1 #define PETSCKSP_DLL 2 3 /* 4 3/99 Modified by Stephen Barnard to support SPAI version 3.0 5 */ 6 7 /* 8 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 9 Code written by Stephen Barnard. 10 11 Note: there is some BAD memory bleeding below! 12 13 This code needs work 14 15 1) get rid of all memory bleeding 16 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 17 rather than having the sp flag for PC_SPAI 18 3) fix to set the block size based on the matrix block size 19 20 */ 21 22 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 23 #include "petscspai.h" 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include "spai.h" 31 #include "matrix.h" 32 EXTERN_C_END 33 34 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 36 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCSetUp_SPAI" 63 static PetscErrorCode PCSetUp_SPAI(PC pc) 64 { 65 PC_SPAI *ispai = (PC_SPAI*)pc->data; 66 PetscErrorCode ierr; 67 Mat AT; 68 69 PetscFunctionBegin; 70 71 init_SPAI(); 72 73 if (ispai->sp) { 74 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 75 } else { 76 /* Use the transpose to get the column nonzero structure. */ 77 ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr); 78 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 79 ierr = MatDestroy(AT);CHKERRQ(ierr); 80 } 81 82 /* Destroy the transpose */ 83 /* Don't know how to do it. PETSc developers? */ 84 85 /* construct SPAI preconditioner */ 86 /* FILE *messages */ /* file for warning messages */ 87 /* double epsilon */ /* tolerance */ 88 /* int nbsteps */ /* max number of "improvement" steps per line */ 89 /* int max */ /* max dimensions of is_I, q, etc. */ 90 /* int maxnew */ /* max number of new entries per step */ 91 /* int block_size */ /* block_size == 1 specifies scalar elments 92 block_size == n specifies nxn constant-block elements 93 block_size == 0 specifies variable-block elements */ 94 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 95 /* cache_size == 0 indicates no caching */ 96 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 97 verbose == 1 prints timing and matrix statistics */ 98 99 ierr = bspai(ispai->B,&ispai->M, 100 stdout, 101 ispai->epsilon, 102 ispai->nbsteps, 103 ispai->max, 104 ispai->maxnew, 105 ispai->block_size, 106 ispai->cache_size, 107 ispai->verbose); CHKERRQ(ierr); 108 109 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 110 111 /* free the SPAI matrices */ 112 sp_free_matrix(ispai->B); 113 sp_free_matrix(ispai->M); 114 115 PetscFunctionReturn(0); 116 } 117 118 /**********************************************************************/ 119 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCApply_SPAI" 122 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 123 { 124 PC_SPAI *ispai = (PC_SPAI*)pc->data; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 /* Now using PETSc's multiply */ 129 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /**********************************************************************/ 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "PCDestroy_SPAI" 137 static PetscErrorCode PCDestroy_SPAI(PC pc) 138 { 139 PetscErrorCode ierr; 140 PC_SPAI *ispai = (PC_SPAI*)pc->data; 141 142 PetscFunctionBegin; 143 if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);} 144 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 145 ierr = PetscFree(ispai);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 /**********************************************************************/ 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "PCView_SPAI" 153 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 154 { 155 PC_SPAI *ispai = (PC_SPAI*)pc->data; 156 PetscErrorCode ierr; 157 PetscTruth iascii; 158 159 PetscFunctionBegin; 160 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 161 if (iascii) { 162 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 169 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 170 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 EXTERN_C_BEGIN 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 178 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 179 { 180 PC_SPAI *ispai = (PC_SPAI*)pc->data; 181 PetscFunctionBegin; 182 ispai->epsilon = epsilon1; 183 PetscFunctionReturn(0); 184 } 185 EXTERN_C_END 186 187 /**********************************************************************/ 188 189 EXTERN_C_BEGIN 190 #undef __FUNCT__ 191 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 192 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 193 { 194 PC_SPAI *ispai = (PC_SPAI*)pc->data; 195 PetscFunctionBegin; 196 ispai->nbsteps = nbsteps1; 197 PetscFunctionReturn(0); 198 } 199 EXTERN_C_END 200 201 /**********************************************************************/ 202 203 /* added 1/7/99 g.h. */ 204 EXTERN_C_BEGIN 205 #undef __FUNCT__ 206 #define __FUNCT__ "PCSPAISetMax_SPAI" 207 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1) 208 { 209 PC_SPAI *ispai = (PC_SPAI*)pc->data; 210 PetscFunctionBegin; 211 ispai->max = max1; 212 PetscFunctionReturn(0); 213 } 214 EXTERN_C_END 215 216 /**********************************************************************/ 217 218 EXTERN_C_BEGIN 219 #undef __FUNCT__ 220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 221 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 222 { 223 PC_SPAI *ispai = (PC_SPAI*)pc->data; 224 PetscFunctionBegin; 225 ispai->maxnew = maxnew1; 226 PetscFunctionReturn(0); 227 } 228 EXTERN_C_END 229 230 /**********************************************************************/ 231 232 EXTERN_C_BEGIN 233 #undef __FUNCT__ 234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 235 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 236 { 237 PC_SPAI *ispai = (PC_SPAI*)pc->data; 238 PetscFunctionBegin; 239 ispai->block_size = block_size1; 240 PetscFunctionReturn(0); 241 } 242 EXTERN_C_END 243 244 /**********************************************************************/ 245 246 EXTERN_C_BEGIN 247 #undef __FUNCT__ 248 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 249 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 250 { 251 PC_SPAI *ispai = (PC_SPAI*)pc->data; 252 PetscFunctionBegin; 253 ispai->cache_size = cache_size; 254 PetscFunctionReturn(0); 255 } 256 EXTERN_C_END 257 258 /**********************************************************************/ 259 260 EXTERN_C_BEGIN 261 #undef __FUNCT__ 262 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 263 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose) 264 { 265 PC_SPAI *ispai = (PC_SPAI*)pc->data; 266 PetscFunctionBegin; 267 ispai->verbose = verbose; 268 PetscFunctionReturn(0); 269 } 270 EXTERN_C_END 271 272 /**********************************************************************/ 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "PCSPAISetSp_SPAI" 277 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp) 278 { 279 PC_SPAI *ispai = (PC_SPAI*)pc->data; 280 PetscFunctionBegin; 281 ispai->sp = sp; 282 PetscFunctionReturn(0); 283 } 284 EXTERN_C_END 285 286 /* -------------------------------------------------------------------*/ 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "PCSPAISetEpsilon" 290 /*@ 291 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 292 293 Input Parameters: 294 + pc - the preconditioner 295 - eps - epsilon (default .4) 296 297 Notes: Espilon must be between 0 and 1. It controls the 298 quality of the approximation of M to the inverse of 299 A. Higher values of epsilon lead to more work, more 300 fill, and usually better preconditioners. In many 301 cases the best choice of epsilon is the one that 302 divides the total solution time equally between the 303 preconditioner and the solver. 304 305 Level: intermediate 306 307 .seealso: PCSPAI, PCSetType() 308 @*/ 309 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1) 310 { 311 PetscErrorCode ierr,(*f)(PC,double); 312 PetscFunctionBegin; 313 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr); 314 if (f) { 315 ierr = (*f)(pc,epsilon1);CHKERRQ(ierr); 316 } 317 PetscFunctionReturn(0); 318 } 319 320 /**********************************************************************/ 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "PCSPAISetNBSteps" 324 /*@ 325 PCSPAISetNBSteps - set maximum number of improvement steps per row in 326 the SPAI preconditioner 327 328 Input Parameters: 329 + pc - the preconditioner 330 - n - number of steps (default 5) 331 332 Notes: SPAI constructs to approximation to every column of 333 the exact inverse of A in a series of improvement 334 steps. The quality of the approximation is determined 335 by epsilon. If an approximation achieving an accuracy 336 of epsilon is not obtained after ns steps, SPAI simply 337 uses the best approximation constructed so far. 338 339 Level: intermediate 340 341 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 342 @*/ 343 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1) 344 { 345 PetscErrorCode ierr,(*f)(PC,int); 346 PetscFunctionBegin; 347 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr); 348 if (f) { 349 ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr); 350 } 351 PetscFunctionReturn(0); 352 } 353 354 /**********************************************************************/ 355 356 /* added 1/7/99 g.h. */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCSPAISetMax" 359 /*@ 360 PCSPAISetMax - set the size of various working buffers in 361 the SPAI preconditioner 362 363 Input Parameters: 364 + pc - the preconditioner 365 - n - size (default is 5000) 366 367 Level: intermediate 368 369 .seealso: PCSPAI, PCSetType() 370 @*/ 371 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1) 372 { 373 PetscErrorCode ierr,(*f)(PC,int); 374 PetscFunctionBegin; 375 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr); 376 if (f) { 377 ierr = (*f)(pc,max1);CHKERRQ(ierr); 378 } 379 PetscFunctionReturn(0); 380 } 381 382 /**********************************************************************/ 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "PCSPAISetMaxNew" 386 /*@ 387 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 388 in SPAI preconditioner 389 390 Input Parameters: 391 + pc - the preconditioner 392 - n - maximum number (default 5) 393 394 Level: intermediate 395 396 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 397 @*/ 398 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1) 399 { 400 PetscErrorCode ierr,(*f)(PC,int); 401 PetscFunctionBegin; 402 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr); 403 if (f) { 404 ierr = (*f)(pc,maxnew1);CHKERRQ(ierr); 405 } 406 PetscFunctionReturn(0); 407 } 408 409 /**********************************************************************/ 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "PCSPAISetBlockSize" 413 /*@ 414 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 415 416 Input Parameters: 417 + pc - the preconditioner 418 - n - block size (default 1) 419 420 Notes: A block 421 size of 1 treats A as a matrix of scalar elements. A 422 block size of s > 1 treats A as a matrix of sxs 423 blocks. A block size of 0 treats A as a matrix with 424 variable sized blocks, which are determined by 425 searching for dense square diagonal blocks in A. 426 This can be very effective for finite-element 427 matrices. 428 429 SPAI will convert A to block form, use a block 430 version of the preconditioner algorithm, and then 431 convert the result back to scalar form. 432 433 In many cases the a block-size parameter other than 1 434 can lead to very significant improvement in 435 performance. 436 437 438 Level: intermediate 439 440 .seealso: PCSPAI, PCSetType() 441 @*/ 442 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1) 443 { 444 PetscErrorCode ierr,(*f)(PC,int); 445 PetscFunctionBegin; 446 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr); 447 if (f) { 448 ierr = (*f)(pc,block_size1);CHKERRQ(ierr); 449 } 450 PetscFunctionReturn(0); 451 } 452 453 /**********************************************************************/ 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "PCSPAISetCacheSize" 457 /*@ 458 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 459 460 Input Parameters: 461 + pc - the preconditioner 462 - n - cache size {0,1,2,3,4,5} (default 5) 463 464 Notes: SPAI uses a hash table to cache messages and avoid 465 redundant communication. If suggest always using 466 5. This parameter is irrelevant in the serial 467 version. 468 469 Level: intermediate 470 471 .seealso: PCSPAI, PCSetType() 472 @*/ 473 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size) 474 { 475 PetscErrorCode ierr,(*f)(PC,int); 476 PetscFunctionBegin; 477 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr); 478 if (f) { 479 ierr = (*f)(pc,cache_size);CHKERRQ(ierr); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 /**********************************************************************/ 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "PCSPAISetVerbose" 488 /*@ 489 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 490 491 Input Parameters: 492 + pc - the preconditioner 493 - n - level (default 1) 494 495 Notes: print parameters, timings and matrix statistics 496 497 Level: intermediate 498 499 .seealso: PCSPAI, PCSetType() 500 @*/ 501 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose) 502 { 503 PetscErrorCode ierr,(*f)(PC,int); 504 PetscFunctionBegin; 505 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr); 506 if (f) { 507 ierr = (*f)(pc,verbose);CHKERRQ(ierr); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 /**********************************************************************/ 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "PCSPAISetSp" 516 /*@ 517 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 518 519 Input Parameters: 520 + pc - the preconditioner 521 - n - 0 or 1 522 523 Notes: If A has a symmetric nonzero pattern use -sp 1 to 524 improve performance by eliminating some communication 525 in the parallel version. Even if A does not have a 526 symmetric nonzero pattern -sp 1 may well lead to good 527 results, but the code will not follow the published 528 SPAI algorithm exactly. 529 530 531 Level: intermediate 532 533 .seealso: PCSPAI, PCSetType() 534 @*/ 535 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp) 536 { 537 PetscErrorCode ierr,(*f)(PC,int); 538 PetscFunctionBegin; 539 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr); 540 if (f) { 541 ierr = (*f)(pc,sp);CHKERRQ(ierr); 542 } 543 PetscFunctionReturn(0); 544 } 545 546 /**********************************************************************/ 547 548 /**********************************************************************/ 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "PCSetFromOptions_SPAI" 552 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 553 { 554 PC_SPAI *ispai = (PC_SPAI*)pc->data; 555 PetscErrorCode ierr; 556 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 557 double epsilon1; 558 PetscTruth flg; 559 560 PetscFunctionBegin; 561 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 562 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 563 if (flg) { 564 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 565 } 566 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 567 if (flg) { 568 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 569 } 570 /* added 1/7/99 g.h. */ 571 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 572 if (flg) { 573 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 574 } 575 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 576 if (flg) { 577 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 578 } 579 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 580 if (flg) { 581 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 582 } 583 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 584 if (flg) { 585 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 586 } 587 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 588 if (flg) { 589 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 590 } 591 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 592 if (flg) { 593 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 594 } 595 ierr = PetscOptionsTail();CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 /**********************************************************************/ 600 601 /*MC 602 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 603 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 604 605 Options Database Keys: 606 + -pc_spai_set_epsilon <eps> - set tolerance 607 . -pc_spai_nbstep <n> - set nbsteps 608 . -pc_spai_max <m> - set max 609 . -pc_spai_max_new <m> - set maxnew 610 . -pc_spai_block_size <n> - set block size 611 . -pc_spai_cache_size <n> - set cache size 612 . -pc_spai_sp <m> - set sp 613 - -pc_spai_set_verbose <true,false> - verbose output 614 615 Notes: This only works with AIJ matrices. 616 617 Level: beginner 618 619 Concepts: approximate inverse 620 621 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 622 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 623 PCSPAISetVerbose(), PCSPAISetSp() 624 M*/ 625 626 EXTERN_C_BEGIN 627 #undef __FUNCT__ 628 #define __FUNCT__ "PCCreate_SPAI" 629 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc) 630 { 631 PC_SPAI *ispai; 632 PetscErrorCode ierr; 633 634 PetscFunctionBegin; 635 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 636 pc->data = ispai; 637 638 pc->ops->destroy = PCDestroy_SPAI; 639 pc->ops->apply = PCApply_SPAI; 640 pc->ops->applyrichardson = 0; 641 pc->ops->setup = PCSetUp_SPAI; 642 pc->ops->view = PCView_SPAI; 643 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 644 645 ispai->epsilon = .4; 646 ispai->nbsteps = 5; 647 ispai->max = 5000; 648 ispai->maxnew = 5; 649 ispai->block_size = 1; 650 ispai->cache_size = 5; 651 ispai->verbose = 0; 652 653 ispai->sp = 1; 654 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 655 656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 657 "PCSPAISetEpsilon_SPAI", 658 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 660 "PCSPAISetNBSteps_SPAI", 661 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 663 "PCSPAISetMax_SPAI", 664 PCSPAISetMax_SPAI);CHKERRQ(ierr); 665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 666 "PCSPAISetMaxNew_SPAI", 667 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 669 "PCSPAISetBlockSize_SPAI", 670 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 672 "PCSPAISetCacheSize_SPAI", 673 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 675 "PCSPAISetVerbose_SPAI", 676 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 677 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 678 "PCSPAISetSp_SPAI", 679 PCSPAISetSp_SPAI);CHKERRQ(ierr); 680 681 PetscFunctionReturn(0); 682 } 683 EXTERN_C_END 684 685 /**********************************************************************/ 686 687 /* 688 Converts from a PETSc matrix to an SPAI matrix 689 */ 690 #undef __FUNCT__ 691 #define __FUNCT__ "ConvertMatToMatrix" 692 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 693 { 694 matrix *M; 695 int i,j,col; 696 int row_indx; 697 int len,pe,local_indx,start_indx; 698 int *mapping; 699 PetscErrorCode ierr; 700 const int *cols; 701 const double *vals; 702 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 703 PetscMPIInt size,rank; 704 struct compressed_lines *rows; 705 706 PetscFunctionBegin; 707 708 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 710 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 711 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 712 713 /* 714 not sure why a barrier is required. commenting out 715 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 716 */ 717 718 M = new_matrix((SPAI_Comm)comm); 719 720 M->n = n; 721 M->bs = 1; 722 M->max_block_size = 1; 723 724 M->mnls = (int*)malloc(sizeof(int)*size); 725 M->start_indices = (int*)malloc(sizeof(int)*size); 726 M->pe = (int*)malloc(sizeof(int)*n); 727 M->block_sizes = (int*)malloc(sizeof(int)*n); 728 for (i=0; i<n; i++) M->block_sizes[i] = 1; 729 730 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 731 732 M->start_indices[0] = 0; 733 for (i=1; i<size; i++) { 734 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 735 } 736 737 M->mnl = M->mnls[M->myid]; 738 M->my_start_index = M->start_indices[M->myid]; 739 740 for (i=0; i<size; i++) { 741 start_indx = M->start_indices[i]; 742 for (j=0; j<M->mnls[i]; j++) 743 M->pe[start_indx+j] = i; 744 } 745 746 if (AT) { 747 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 748 } else { 749 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 750 } 751 752 rows = M->lines; 753 754 /* Determine the mapping from global indices to pointers */ 755 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 756 pe = 0; 757 local_indx = 0; 758 for (i=0; i<M->n; i++) { 759 if (local_indx >= M->mnls[pe]) { 760 pe++; 761 local_indx = 0; 762 } 763 mapping[i] = local_indx + M->start_indices[pe]; 764 local_indx++; 765 } 766 767 768 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 769 770 /*********************************************************/ 771 /************** Set up the row structure *****************/ 772 /*********************************************************/ 773 774 /* count number of nonzeros in every row */ 775 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 776 for (i=rstart; i<rend; i++) { 777 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 778 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 779 } 780 781 /* allocate buffers */ 782 len = 0; 783 for (i=0; i<mnl; i++) { 784 if (len < num_ptr[i]) len = num_ptr[i]; 785 } 786 787 for (i=rstart; i<rend; i++) { 788 row_indx = i-rstart; 789 len = num_ptr[row_indx]; 790 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 791 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 792 } 793 794 /* copy the matrix */ 795 for (i=rstart; i<rend; i++) { 796 row_indx = i - rstart; 797 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 798 for (j=0; j<nz; j++) { 799 col = cols[j]; 800 len = rows->len[row_indx]++; 801 rows->ptrs[row_indx][len] = mapping[col]; 802 rows->A[row_indx][len] = vals[j]; 803 } 804 rows->slen[row_indx] = rows->len[row_indx]; 805 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 806 } 807 808 809 /************************************************************/ 810 /************** Set up the column structure *****************/ 811 /*********************************************************/ 812 813 if (AT) { 814 815 /* count number of nonzeros in every column */ 816 for (i=rstart; i<rend; i++) { 817 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 818 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 819 } 820 821 /* allocate buffers */ 822 len = 0; 823 for (i=0; i<mnl; i++) { 824 if (len < num_ptr[i]) len = num_ptr[i]; 825 } 826 827 for (i=rstart; i<rend; i++) { 828 row_indx = i-rstart; 829 len = num_ptr[row_indx]; 830 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 831 } 832 833 /* copy the matrix (i.e., the structure) */ 834 for (i=rstart; i<rend; i++) { 835 row_indx = i - rstart; 836 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 837 for (j=0; j<nz; j++) { 838 col = cols[j]; 839 len = rows->rlen[row_indx]++; 840 rows->rptrs[row_indx][len] = mapping[col]; 841 } 842 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 843 } 844 } 845 846 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 847 ierr = PetscFree(mapping);CHKERRQ(ierr); 848 849 order_pointers(M); 850 M->maxnz = calc_maxnz(M); 851 852 *B = M; 853 854 PetscFunctionReturn(0); 855 } 856 857 /**********************************************************************/ 858 859 /* 860 Converts from an SPAI matrix B to a PETSc matrix PB. 861 This assumes that the the SPAI matrix B is stored in 862 COMPRESSED-ROW format. 863 */ 864 #undef __FUNCT__ 865 #define __FUNCT__ "ConvertMatrixToMat" 866 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 867 { 868 PetscMPIInt size,rank; 869 PetscErrorCode ierr; 870 int m,n,M,N; 871 int d_nz,o_nz; 872 int *d_nnz,*o_nnz; 873 int i,k,global_row,global_col,first_diag_col,last_diag_col; 874 PetscScalar val; 875 876 PetscFunctionBegin; 877 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 878 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 879 880 m = n = B->mnls[rank]; 881 d_nz = o_nz = 0; 882 883 /* Determine preallocation for MatCreateMPIAIJ */ 884 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 885 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 886 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 887 first_diag_col = B->start_indices[rank]; 888 last_diag_col = first_diag_col + B->mnls[rank]; 889 for (i=0; i<B->mnls[rank]; i++) { 890 for (k=0; k<B->lines->len[i]; k++) { 891 global_col = B->lines->ptrs[i][k]; 892 if ((global_col >= first_diag_col) && (global_col <= last_diag_col)) 893 d_nnz[i]++; 894 else 895 o_nnz[i]++; 896 } 897 } 898 899 M = N = B->n; 900 /* Here we only know how to create AIJ format */ 901 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 902 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 903 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 904 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 905 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 906 907 for (i=0; i<B->mnls[rank]; i++) { 908 global_row = B->start_indices[rank]+i; 909 for (k=0; k<B->lines->len[i]; k++) { 910 global_col = B->lines->ptrs[i][k]; 911 val = B->lines->A[i][k]; 912 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 913 } 914 } 915 916 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 917 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 918 919 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 921 922 PetscFunctionReturn(0); 923 } 924 925 /**********************************************************************/ 926 927 /* 928 Converts from an SPAI vector v to a PETSc vec Pv. 929 */ 930 #undef __FUNCT__ 931 #define __FUNCT__ "ConvertVectorToVec" 932 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 933 { 934 PetscErrorCode ierr; 935 PetscMPIInt size,rank; 936 int m,M,i,*mnls,*start_indices,*global_indices; 937 938 PetscFunctionBegin; 939 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 940 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 941 942 m = v->mnl; 943 M = v->n; 944 945 946 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 947 948 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 949 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 950 951 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 952 start_indices[0] = 0; 953 for (i=1; i<size; i++) 954 start_indices[i] = start_indices[i-1] +mnls[i-1]; 955 956 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 957 for (i=0; i<v->mnl; i++) 958 global_indices[i] = start_indices[rank] + i; 959 960 ierr = PetscFree(mnls);CHKERRQ(ierr); 961 ierr = PetscFree(start_indices);CHKERRQ(ierr); 962 963 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 964 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 965 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 966 967 ierr = PetscFree(global_indices);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 972 973 974