1 #define PETSCKSP_DLL 2 3 /* 4 3/99 Modified by Stephen Barnard to support SPAI version 3.0 5 */ 6 7 /* 8 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 9 Code written by Stephen Barnard. 10 11 Note: there is some BAD memory bleeding below! 12 13 This code needs work 14 15 1) get rid of all memory bleeding 16 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 17 rather than having the sp flag for PC_SPAI 18 19 */ 20 21 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #include "spai.h" 29 #include "matrix.h" 30 EXTERN_C_END 31 32 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 33 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 34 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 35 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *); 36 37 typedef struct { 38 39 matrix *B; /* matrix in SPAI format */ 40 matrix *BT; /* transpose of matrix in SPAI format */ 41 matrix *M; /* the approximate inverse in SPAI format */ 42 43 Mat PM; /* the approximate inverse PETSc format */ 44 45 double epsilon; /* tolerance */ 46 int nbsteps; /* max number of "improvement" steps per line */ 47 int max; /* max dimensions of is_I, q, etc. */ 48 int maxnew; /* max number of new entries per step */ 49 int block_size; /* constant block size */ 50 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 51 int verbose; /* SPAI prints timing and statistics */ 52 53 int sp; /* symmetric nonzero pattern */ 54 MPI_Comm comm_spai; /* communicator to be used with spai */ 55 } PC_SPAI; 56 57 /**********************************************************************/ 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCSetUp_SPAI" 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 PetscErrorCode ierr; 65 Mat AT; 66 67 PetscFunctionBegin; 68 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 93 /* cache_size == 0 indicates no caching */ 94 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 95 verbose == 1 prints timing and matrix statistics */ 96 97 ierr = bspai(ispai->B,&ispai->M, 98 stdout, 99 ispai->epsilon, 100 ispai->nbsteps, 101 ispai->max, 102 ispai->maxnew, 103 ispai->block_size, 104 ispai->cache_size, 105 ispai->verbose); CHKERRQ(ierr); 106 107 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 108 109 /* free the SPAI matrices */ 110 sp_free_matrix(ispai->B); 111 sp_free_matrix(ispai->M); 112 113 PetscFunctionReturn(0); 114 } 115 116 /**********************************************************************/ 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCApply_SPAI" 120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 121 { 122 PC_SPAI *ispai = (PC_SPAI*)pc->data; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 /* Now using PETSc's multiply */ 127 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /**********************************************************************/ 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "PCDestroy_SPAI" 135 static PetscErrorCode PCDestroy_SPAI(PC pc) 136 { 137 PetscErrorCode ierr; 138 PC_SPAI *ispai = (PC_SPAI*)pc->data; 139 140 PetscFunctionBegin; 141 if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);} 142 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 143 ierr = PetscFree(ispai);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /**********************************************************************/ 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCView_SPAI" 151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 152 { 153 PC_SPAI *ispai = (PC_SPAI*)pc->data; 154 PetscErrorCode ierr; 155 PetscTruth iascii; 156 157 PetscFunctionBegin; 158 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 159 if (iascii) { 160 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 169 170 } 171 PetscFunctionReturn(0); 172 } 173 174 EXTERN_C_BEGIN 175 #undef __FUNCT__ 176 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 177 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 178 { 179 PC_SPAI *ispai = (PC_SPAI*)pc->data; 180 PetscFunctionBegin; 181 ispai->epsilon = epsilon1; 182 PetscFunctionReturn(0); 183 } 184 EXTERN_C_END 185 186 /**********************************************************************/ 187 188 EXTERN_C_BEGIN 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 191 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 192 { 193 PC_SPAI *ispai = (PC_SPAI*)pc->data; 194 PetscFunctionBegin; 195 ispai->nbsteps = nbsteps1; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 200 /**********************************************************************/ 201 202 /* added 1/7/99 g.h. */ 203 EXTERN_C_BEGIN 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCSPAISetMax_SPAI" 206 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1) 207 { 208 PC_SPAI *ispai = (PC_SPAI*)pc->data; 209 PetscFunctionBegin; 210 ispai->max = max1; 211 PetscFunctionReturn(0); 212 } 213 EXTERN_C_END 214 215 /**********************************************************************/ 216 217 EXTERN_C_BEGIN 218 #undef __FUNCT__ 219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 220 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 221 { 222 PC_SPAI *ispai = (PC_SPAI*)pc->data; 223 PetscFunctionBegin; 224 ispai->maxnew = maxnew1; 225 PetscFunctionReturn(0); 226 } 227 EXTERN_C_END 228 229 /**********************************************************************/ 230 231 EXTERN_C_BEGIN 232 #undef __FUNCT__ 233 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 234 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 235 { 236 PC_SPAI *ispai = (PC_SPAI*)pc->data; 237 PetscFunctionBegin; 238 ispai->block_size = block_size1; 239 PetscFunctionReturn(0); 240 } 241 EXTERN_C_END 242 243 /**********************************************************************/ 244 245 EXTERN_C_BEGIN 246 #undef __FUNCT__ 247 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 248 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 249 { 250 PC_SPAI *ispai = (PC_SPAI*)pc->data; 251 PetscFunctionBegin; 252 ispai->cache_size = cache_size; 253 PetscFunctionReturn(0); 254 } 255 EXTERN_C_END 256 257 /**********************************************************************/ 258 259 EXTERN_C_BEGIN 260 #undef __FUNCT__ 261 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 262 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose) 263 { 264 PC_SPAI *ispai = (PC_SPAI*)pc->data; 265 PetscFunctionBegin; 266 ispai->verbose = verbose; 267 PetscFunctionReturn(0); 268 } 269 EXTERN_C_END 270 271 /**********************************************************************/ 272 273 EXTERN_C_BEGIN 274 #undef __FUNCT__ 275 #define __FUNCT__ "PCSPAISetSp_SPAI" 276 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp) 277 { 278 PC_SPAI *ispai = (PC_SPAI*)pc->data; 279 PetscFunctionBegin; 280 ispai->sp = sp; 281 PetscFunctionReturn(0); 282 } 283 EXTERN_C_END 284 285 /* -------------------------------------------------------------------*/ 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "PCSPAISetEpsilon" 289 /*@ 290 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 291 292 Input Parameters: 293 + pc - the preconditioner 294 - eps - epsilon (default .4) 295 296 Notes: Espilon must be between 0 and 1. It controls the 297 quality of the approximation of M to the inverse of 298 A. Higher values of epsilon lead to more work, more 299 fill, and usually better preconditioners. In many 300 cases the best choice of epsilon is the one that 301 divides the total solution time equally between the 302 preconditioner and the solver. 303 304 Level: intermediate 305 306 .seealso: PCSPAI, PCSetType() 307 @*/ 308 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1) 309 { 310 PetscErrorCode ierr,(*f)(PC,double); 311 PetscFunctionBegin; 312 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr); 313 if (f) { 314 ierr = (*f)(pc,epsilon1);CHKERRQ(ierr); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 /**********************************************************************/ 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "PCSPAISetNBSteps" 323 /*@ 324 PCSPAISetNBSteps - set maximum number of improvement steps per row in 325 the SPAI preconditioner 326 327 Input Parameters: 328 + pc - the preconditioner 329 - n - number of steps (default 5) 330 331 Notes: SPAI constructs to approximation to every column of 332 the exact inverse of A in a series of improvement 333 steps. The quality of the approximation is determined 334 by epsilon. If an approximation achieving an accuracy 335 of epsilon is not obtained after ns steps, SPAI simply 336 uses the best approximation constructed so far. 337 338 Level: intermediate 339 340 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 341 @*/ 342 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1) 343 { 344 PetscErrorCode ierr,(*f)(PC,int); 345 PetscFunctionBegin; 346 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr); 347 if (f) { 348 ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr); 349 } 350 PetscFunctionReturn(0); 351 } 352 353 /**********************************************************************/ 354 355 /* added 1/7/99 g.h. */ 356 #undef __FUNCT__ 357 #define __FUNCT__ "PCSPAISetMax" 358 /*@ 359 PCSPAISetMax - set the size of various working buffers in 360 the SPAI preconditioner 361 362 Input Parameters: 363 + pc - the preconditioner 364 - n - size (default is 5000) 365 366 Level: intermediate 367 368 .seealso: PCSPAI, PCSetType() 369 @*/ 370 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1) 371 { 372 PetscErrorCode ierr,(*f)(PC,int); 373 PetscFunctionBegin; 374 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr); 375 if (f) { 376 ierr = (*f)(pc,max1);CHKERRQ(ierr); 377 } 378 PetscFunctionReturn(0); 379 } 380 381 /**********************************************************************/ 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "PCSPAISetMaxNew" 385 /*@ 386 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 387 in SPAI preconditioner 388 389 Input Parameters: 390 + pc - the preconditioner 391 - n - maximum number (default 5) 392 393 Level: intermediate 394 395 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 396 @*/ 397 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1) 398 { 399 PetscErrorCode ierr,(*f)(PC,int); 400 PetscFunctionBegin; 401 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr); 402 if (f) { 403 ierr = (*f)(pc,maxnew1);CHKERRQ(ierr); 404 } 405 PetscFunctionReturn(0); 406 } 407 408 /**********************************************************************/ 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCSPAISetBlockSize" 412 /*@ 413 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 414 415 Input Parameters: 416 + pc - the preconditioner 417 - n - block size (default 1) 418 419 Notes: A block 420 size of 1 treats A as a matrix of scalar elements. A 421 block size of s > 1 treats A as a matrix of sxs 422 blocks. A block size of 0 treats A as a matrix with 423 variable sized blocks, which are determined by 424 searching for dense square diagonal blocks in A. 425 This can be very effective for finite-element 426 matrices. 427 428 SPAI will convert A to block form, use a block 429 version of the preconditioner algorithm, and then 430 convert the result back to scalar form. 431 432 In many cases the a block-size parameter other than 1 433 can lead to very significant improvement in 434 performance. 435 436 437 Level: intermediate 438 439 .seealso: PCSPAI, PCSetType() 440 @*/ 441 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1) 442 { 443 PetscErrorCode ierr,(*f)(PC,int); 444 PetscFunctionBegin; 445 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr); 446 if (f) { 447 ierr = (*f)(pc,block_size1);CHKERRQ(ierr); 448 } 449 PetscFunctionReturn(0); 450 } 451 452 /**********************************************************************/ 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "PCSPAISetCacheSize" 456 /*@ 457 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 458 459 Input Parameters: 460 + pc - the preconditioner 461 - n - cache size {0,1,2,3,4,5} (default 5) 462 463 Notes: SPAI uses a hash table to cache messages and avoid 464 redundant communication. If suggest always using 465 5. This parameter is irrelevant in the serial 466 version. 467 468 Level: intermediate 469 470 .seealso: PCSPAI, PCSetType() 471 @*/ 472 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size) 473 { 474 PetscErrorCode ierr,(*f)(PC,int); 475 PetscFunctionBegin; 476 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr); 477 if (f) { 478 ierr = (*f)(pc,cache_size);CHKERRQ(ierr); 479 } 480 PetscFunctionReturn(0); 481 } 482 483 /**********************************************************************/ 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCSPAISetVerbose" 487 /*@ 488 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 489 490 Input Parameters: 491 + pc - the preconditioner 492 - n - level (default 1) 493 494 Notes: print parameters, timings and matrix statistics 495 496 Level: intermediate 497 498 .seealso: PCSPAI, PCSetType() 499 @*/ 500 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose) 501 { 502 PetscErrorCode ierr,(*f)(PC,int); 503 PetscFunctionBegin; 504 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr); 505 if (f) { 506 ierr = (*f)(pc,verbose);CHKERRQ(ierr); 507 } 508 PetscFunctionReturn(0); 509 } 510 511 /**********************************************************************/ 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "PCSPAISetSp" 515 /*@ 516 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 517 518 Input Parameters: 519 + pc - the preconditioner 520 - n - 0 or 1 521 522 Notes: If A has a symmetric nonzero pattern use -sp 1 to 523 improve performance by eliminating some communication 524 in the parallel version. Even if A does not have a 525 symmetric nonzero pattern -sp 1 may well lead to good 526 results, but the code will not follow the published 527 SPAI algorithm exactly. 528 529 530 Level: intermediate 531 532 .seealso: PCSPAI, PCSetType() 533 @*/ 534 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp) 535 { 536 PetscErrorCode ierr,(*f)(PC,int); 537 PetscFunctionBegin; 538 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr); 539 if (f) { 540 ierr = (*f)(pc,sp);CHKERRQ(ierr); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 /**********************************************************************/ 546 547 /**********************************************************************/ 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "PCSetFromOptions_SPAI" 551 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 552 { 553 PC_SPAI *ispai = (PC_SPAI*)pc->data; 554 PetscErrorCode ierr; 555 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 556 double epsilon1; 557 PetscTruth flg; 558 559 PetscFunctionBegin; 560 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 561 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 562 if (flg) { 563 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 564 } 565 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 566 if (flg) { 567 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 568 } 569 /* added 1/7/99 g.h. */ 570 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 571 if (flg) { 572 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 573 } 574 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 575 if (flg) { 576 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 577 } 578 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 579 if (flg) { 580 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 581 } 582 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 583 if (flg) { 584 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 585 } 586 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 587 if (flg) { 588 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 589 } 590 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 591 if (flg) { 592 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 593 } 594 ierr = PetscOptionsTail();CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 /**********************************************************************/ 599 600 /*MC 601 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 602 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 603 604 Options Database Keys: 605 + -pc_spai_set_epsilon <eps> - set tolerance 606 . -pc_spai_nbstep <n> - set nbsteps 607 . -pc_spai_max <m> - set max 608 . -pc_spai_max_new <m> - set maxnew 609 . -pc_spai_block_size <n> - set block size 610 . -pc_spai_cache_size <n> - set cache size 611 . -pc_spai_sp <m> - set sp 612 - -pc_spai_set_verbose <true,false> - verbose output 613 614 Notes: This only works with AIJ matrices. 615 616 Level: beginner 617 618 Concepts: approximate inverse 619 620 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 621 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 622 PCSPAISetVerbose(), PCSPAISetSp() 623 M*/ 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "PCCreate_SPAI" 628 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc) 629 { 630 PC_SPAI *ispai; 631 PetscErrorCode ierr; 632 633 PetscFunctionBegin; 634 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 635 pc->data = (void*)ispai; 636 637 pc->ops->destroy = PCDestroy_SPAI; 638 pc->ops->apply = PCApply_SPAI; 639 pc->ops->applyrichardson = 0; 640 pc->ops->setup = PCSetUp_SPAI; 641 pc->ops->view = PCView_SPAI; 642 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 643 644 ispai->epsilon = .4; 645 ispai->nbsteps = 5; 646 ispai->max = 5000; 647 ispai->maxnew = 5; 648 ispai->block_size = 1; 649 ispai->cache_size = 5; 650 ispai->verbose = 0; 651 652 ispai->sp = 1; 653 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 654 655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 656 "PCSPAISetEpsilon_SPAI", 657 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 659 "PCSPAISetNBSteps_SPAI", 660 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 661 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 662 "PCSPAISetMax_SPAI", 663 PCSPAISetMax_SPAI);CHKERRQ(ierr); 664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 665 "PCSPAISetMaxNew_SPAI", 666 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 668 "PCSPAISetBlockSize_SPAI", 669 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 671 "PCSPAISetCacheSize_SPAI", 672 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 674 "PCSPAISetVerbose_SPAI", 675 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 677 "PCSPAISetSp_SPAI", 678 PCSPAISetSp_SPAI);CHKERRQ(ierr); 679 680 PetscFunctionReturn(0); 681 } 682 EXTERN_C_END 683 684 /**********************************************************************/ 685 686 /* 687 Converts from a PETSc matrix to an SPAI matrix 688 */ 689 #undef __FUNCT__ 690 #define __FUNCT__ "ConvertMatToMatrix" 691 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 692 { 693 matrix *M; 694 int i,j,col; 695 int row_indx; 696 int len,pe,local_indx,start_indx; 697 int *mapping; 698 PetscErrorCode ierr; 699 const int *cols; 700 const double *vals; 701 int *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend; 702 struct compressed_lines *rows; 703 704 PetscFunctionBegin; 705 706 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 708 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 709 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 710 711 /* 712 not sure why a barrier is required. commenting out 713 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 714 */ 715 716 M = new_matrix((void*)comm); 717 718 M->n = n; 719 M->bs = 1; 720 M->max_block_size = 1; 721 722 M->mnls = (int*)malloc(sizeof(int)*size); 723 M->start_indices = (int*)malloc(sizeof(int)*size); 724 M->pe = (int*)malloc(sizeof(int)*n); 725 M->block_sizes = (int*)malloc(sizeof(int)*n); 726 for (i=0; i<n; i++) M->block_sizes[i] = 1; 727 728 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 729 730 M->start_indices[0] = 0; 731 for (i=1; i<size; i++) { 732 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 733 } 734 735 M->mnl = M->mnls[M->myid]; 736 M->my_start_index = M->start_indices[M->myid]; 737 738 for (i=0; i<size; i++) { 739 start_indx = M->start_indices[i]; 740 for (j=0; j<M->mnls[i]; j++) 741 M->pe[start_indx+j] = i; 742 } 743 744 if (AT) { 745 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 746 } else { 747 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 748 } 749 750 rows = M->lines; 751 752 /* Determine the mapping from global indices to pointers */ 753 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 754 pe = 0; 755 local_indx = 0; 756 for (i=0; i<M->n; i++) { 757 if (local_indx >= M->mnls[pe]) { 758 pe++; 759 local_indx = 0; 760 } 761 mapping[i] = local_indx + M->start_indices[pe]; 762 local_indx++; 763 } 764 765 766 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 767 768 /*********************************************************/ 769 /************** Set up the row structure *****************/ 770 /*********************************************************/ 771 772 /* count number of nonzeros in every row */ 773 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 774 for (i=rstart; i<rend; i++) { 775 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 776 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 777 } 778 779 /* allocate buffers */ 780 len = 0; 781 for (i=0; i<mnl; i++) { 782 if (len < num_ptr[i]) len = num_ptr[i]; 783 } 784 785 for (i=rstart; i<rend; i++) { 786 row_indx = i-rstart; 787 len = num_ptr[row_indx]; 788 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 789 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 790 } 791 792 /* copy the matrix */ 793 for (i=rstart; i<rend; i++) { 794 row_indx = i - rstart; 795 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 796 for (j=0; j<nz; j++) { 797 col = cols[j]; 798 len = rows->len[row_indx]++; 799 rows->ptrs[row_indx][len] = mapping[col]; 800 rows->A[row_indx][len] = vals[j]; 801 } 802 rows->slen[row_indx] = rows->len[row_indx]; 803 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 804 } 805 806 807 /************************************************************/ 808 /************** Set up the column structure *****************/ 809 /*********************************************************/ 810 811 if (AT) { 812 813 /* count number of nonzeros in every column */ 814 for (i=rstart; i<rend; i++) { 815 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 816 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 817 } 818 819 /* allocate buffers */ 820 len = 0; 821 for (i=0; i<mnl; i++) { 822 if (len < num_ptr[i]) len = num_ptr[i]; 823 } 824 825 for (i=rstart; i<rend; i++) { 826 row_indx = i-rstart; 827 len = num_ptr[row_indx]; 828 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 829 } 830 831 /* copy the matrix (i.e., the structure) */ 832 for (i=rstart; i<rend; i++) { 833 row_indx = i - rstart; 834 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 835 for (j=0; j<nz; j++) { 836 col = cols[j]; 837 len = rows->rlen[row_indx]++; 838 rows->rptrs[row_indx][len] = mapping[col]; 839 } 840 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 841 } 842 } 843 844 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 845 ierr = PetscFree(mapping);CHKERRQ(ierr); 846 847 order_pointers(M); 848 M->maxnz = calc_maxnz(M); 849 850 *B = M; 851 852 PetscFunctionReturn(0); 853 } 854 855 /**********************************************************************/ 856 857 /* 858 Converts from an SPAI matrix B to a PETSc matrix PB. 859 This assumes that the the SPAI matrix B is stored in 860 COMPRESSED-ROW format. 861 */ 862 #undef __FUNCT__ 863 #define __FUNCT__ "ConvertMatrixToMat" 864 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 865 { 866 int size,rank; 867 PetscErrorCode ierr; 868 int m,n,M,N; 869 int d_nz,o_nz; 870 int *d_nnz,*o_nnz; 871 int i,k,global_row,global_col,first_diag_col,last_diag_col; 872 PetscScalar val; 873 874 PetscFunctionBegin; 875 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 876 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 877 878 m = n = B->mnls[rank]; 879 d_nz = o_nz = 0; 880 881 /* Determine preallocation for MatCreateMPIAIJ */ 882 ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr); 883 ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr); 884 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 885 first_diag_col = B->start_indices[rank]; 886 last_diag_col = first_diag_col + B->mnls[rank]; 887 for (i=0; i<B->mnls[rank]; i++) { 888 for (k=0; k<B->lines->len[i]; k++) { 889 global_col = B->lines->ptrs[i][k]; 890 if ((global_col >= first_diag_col) && (global_col <= last_diag_col)) 891 d_nnz[i]++; 892 else 893 o_nnz[i]++; 894 } 895 } 896 897 M = N = B->n; 898 /* Here we only know how to create AIJ format */ 899 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 900 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 901 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 902 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 903 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 904 905 for (i=0; i<B->mnls[rank]; i++) { 906 global_row = B->start_indices[rank]+i; 907 for (k=0; k<B->lines->len[i]; k++) { 908 global_col = B->lines->ptrs[i][k]; 909 val = B->lines->A[i][k]; 910 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 911 } 912 } 913 914 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 915 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 916 917 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 918 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 919 920 PetscFunctionReturn(0); 921 } 922 923 /**********************************************************************/ 924 925 /* 926 Converts from an SPAI vector v to a PETSc vec Pv. 927 */ 928 #undef __FUNCT__ 929 #define __FUNCT__ "ConvertVectorToVec" 930 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 931 { 932 PetscErrorCode ierr; 933 int size,rank,m,M,i,*mnls,*start_indices,*global_indices; 934 935 PetscFunctionBegin; 936 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 937 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 938 939 m = v->mnl; 940 M = v->n; 941 942 943 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 944 945 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 946 ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr); 947 948 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 949 start_indices[0] = 0; 950 for (i=1; i<size; i++) 951 start_indices[i] = start_indices[i-1] +mnls[i-1]; 952 953 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 954 for (i=0; i<v->mnl; i++) 955 global_indices[i] = start_indices[rank] + i; 956 957 ierr = PetscFree(mnls);CHKERRQ(ierr); 958 ierr = PetscFree(start_indices);CHKERRQ(ierr); 959 960 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 961 962 ierr = PetscFree(global_indices);CHKERRQ(ierr); 963 964 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 965 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 966 967 PetscFunctionReturn(0); 968 } 969 970 971 972 973