1 #define PETSCKSP_DLL 2 3 /* 4 3/99 Modified by Stephen Barnard to support SPAI version 3.0 5 */ 6 7 /* 8 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 9 Code written by Stephen Barnard. 10 11 Note: there is some BAD memory bleeding below! 12 13 This code needs work 14 15 1) get rid of all memory bleeding 16 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 17 rather than having the sp flag for PC_SPAI 18 19 */ 20 21 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include "spai.h" 30 #include "matrix.h" 31 EXTERN_C_END 32 33 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 35 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 70 init_SPAI(); 71 72 if (ispai->sp) { 73 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 74 } else { 75 /* Use the transpose to get the column nonzero structure. */ 76 ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr); 77 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 78 ierr = MatDestroy(AT);CHKERRQ(ierr); 79 } 80 81 /* Destroy the transpose */ 82 /* Don't know how to do it. PETSc developers? */ 83 84 /* construct SPAI preconditioner */ 85 /* FILE *messages */ /* file for warning messages */ 86 /* double epsilon */ /* tolerance */ 87 /* int nbsteps */ /* max number of "improvement" steps per line */ 88 /* int max */ /* max dimensions of is_I, q, etc. */ 89 /* int maxnew */ /* max number of new entries per step */ 90 /* int block_size */ /* block_size == 1 specifies scalar elments 91 block_size == n specifies nxn constant-block elements 92 block_size == 0 specifies variable-block elements */ 93 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 94 /* cache_size == 0 indicates no caching */ 95 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 96 verbose == 1 prints timing and matrix statistics */ 97 98 ierr = bspai(ispai->B,&ispai->M, 99 stdout, 100 ispai->epsilon, 101 ispai->nbsteps, 102 ispai->max, 103 ispai->maxnew, 104 ispai->block_size, 105 ispai->cache_size, 106 ispai->verbose); CHKERRQ(ierr); 107 108 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 109 110 /* free the SPAI matrices */ 111 sp_free_matrix(ispai->B); 112 sp_free_matrix(ispai->M); 113 114 PetscFunctionReturn(0); 115 } 116 117 /**********************************************************************/ 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCApply_SPAI" 121 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 122 { 123 PC_SPAI *ispai = (PC_SPAI*)pc->data; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 /* Now using PETSc's multiply */ 128 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /**********************************************************************/ 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "PCDestroy_SPAI" 136 static PetscErrorCode PCDestroy_SPAI(PC pc) 137 { 138 PetscErrorCode ierr; 139 PC_SPAI *ispai = (PC_SPAI*)pc->data; 140 141 PetscFunctionBegin; 142 if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);} 143 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 144 ierr = PetscFree(ispai);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 /**********************************************************************/ 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "PCView_SPAI" 152 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 153 { 154 PC_SPAI *ispai = (PC_SPAI*)pc->data; 155 PetscErrorCode ierr; 156 PetscTruth iascii; 157 158 PetscFunctionBegin; 159 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 160 if (iascii) { 161 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 169 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 170 171 } 172 PetscFunctionReturn(0); 173 } 174 175 EXTERN_C_BEGIN 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 178 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 179 { 180 PC_SPAI *ispai = (PC_SPAI*)pc->data; 181 PetscFunctionBegin; 182 ispai->epsilon = epsilon1; 183 PetscFunctionReturn(0); 184 } 185 EXTERN_C_END 186 187 /**********************************************************************/ 188 189 EXTERN_C_BEGIN 190 #undef __FUNCT__ 191 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 192 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 193 { 194 PC_SPAI *ispai = (PC_SPAI*)pc->data; 195 PetscFunctionBegin; 196 ispai->nbsteps = nbsteps1; 197 PetscFunctionReturn(0); 198 } 199 EXTERN_C_END 200 201 /**********************************************************************/ 202 203 /* added 1/7/99 g.h. */ 204 EXTERN_C_BEGIN 205 #undef __FUNCT__ 206 #define __FUNCT__ "PCSPAISetMax_SPAI" 207 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1) 208 { 209 PC_SPAI *ispai = (PC_SPAI*)pc->data; 210 PetscFunctionBegin; 211 ispai->max = max1; 212 PetscFunctionReturn(0); 213 } 214 EXTERN_C_END 215 216 /**********************************************************************/ 217 218 EXTERN_C_BEGIN 219 #undef __FUNCT__ 220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 221 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 222 { 223 PC_SPAI *ispai = (PC_SPAI*)pc->data; 224 PetscFunctionBegin; 225 ispai->maxnew = maxnew1; 226 PetscFunctionReturn(0); 227 } 228 EXTERN_C_END 229 230 /**********************************************************************/ 231 232 EXTERN_C_BEGIN 233 #undef __FUNCT__ 234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 235 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 236 { 237 PC_SPAI *ispai = (PC_SPAI*)pc->data; 238 PetscFunctionBegin; 239 ispai->block_size = block_size1; 240 PetscFunctionReturn(0); 241 } 242 EXTERN_C_END 243 244 /**********************************************************************/ 245 246 EXTERN_C_BEGIN 247 #undef __FUNCT__ 248 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 249 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 250 { 251 PC_SPAI *ispai = (PC_SPAI*)pc->data; 252 PetscFunctionBegin; 253 ispai->cache_size = cache_size; 254 PetscFunctionReturn(0); 255 } 256 EXTERN_C_END 257 258 /**********************************************************************/ 259 260 EXTERN_C_BEGIN 261 #undef __FUNCT__ 262 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 263 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose) 264 { 265 PC_SPAI *ispai = (PC_SPAI*)pc->data; 266 PetscFunctionBegin; 267 ispai->verbose = verbose; 268 PetscFunctionReturn(0); 269 } 270 EXTERN_C_END 271 272 /**********************************************************************/ 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "PCSPAISetSp_SPAI" 277 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp) 278 { 279 PC_SPAI *ispai = (PC_SPAI*)pc->data; 280 PetscFunctionBegin; 281 ispai->sp = sp; 282 PetscFunctionReturn(0); 283 } 284 EXTERN_C_END 285 286 /* -------------------------------------------------------------------*/ 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "PCSPAISetEpsilon" 290 /*@ 291 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 292 293 Input Parameters: 294 + pc - the preconditioner 295 - eps - epsilon (default .4) 296 297 Notes: Espilon must be between 0 and 1. It controls the 298 quality of the approximation of M to the inverse of 299 A. Higher values of epsilon lead to more work, more 300 fill, and usually better preconditioners. In many 301 cases the best choice of epsilon is the one that 302 divides the total solution time equally between the 303 preconditioner and the solver. 304 305 Level: intermediate 306 307 .seealso: PCSPAI, PCSetType() 308 @*/ 309 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1) 310 { 311 PetscErrorCode ierr,(*f)(PC,double); 312 PetscFunctionBegin; 313 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr); 314 if (f) { 315 ierr = (*f)(pc,epsilon1);CHKERRQ(ierr); 316 } 317 PetscFunctionReturn(0); 318 } 319 320 /**********************************************************************/ 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "PCSPAISetNBSteps" 324 /*@ 325 PCSPAISetNBSteps - set maximum number of improvement steps per row in 326 the SPAI preconditioner 327 328 Input Parameters: 329 + pc - the preconditioner 330 - n - number of steps (default 5) 331 332 Notes: SPAI constructs to approximation to every column of 333 the exact inverse of A in a series of improvement 334 steps. The quality of the approximation is determined 335 by epsilon. If an approximation achieving an accuracy 336 of epsilon is not obtained after ns steps, SPAI simply 337 uses the best approximation constructed so far. 338 339 Level: intermediate 340 341 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 342 @*/ 343 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1) 344 { 345 PetscErrorCode ierr,(*f)(PC,int); 346 PetscFunctionBegin; 347 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr); 348 if (f) { 349 ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr); 350 } 351 PetscFunctionReturn(0); 352 } 353 354 /**********************************************************************/ 355 356 /* added 1/7/99 g.h. */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCSPAISetMax" 359 /*@ 360 PCSPAISetMax - set the size of various working buffers in 361 the SPAI preconditioner 362 363 Input Parameters: 364 + pc - the preconditioner 365 - n - size (default is 5000) 366 367 Level: intermediate 368 369 .seealso: PCSPAI, PCSetType() 370 @*/ 371 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1) 372 { 373 PetscErrorCode ierr,(*f)(PC,int); 374 PetscFunctionBegin; 375 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr); 376 if (f) { 377 ierr = (*f)(pc,max1);CHKERRQ(ierr); 378 } 379 PetscFunctionReturn(0); 380 } 381 382 /**********************************************************************/ 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "PCSPAISetMaxNew" 386 /*@ 387 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 388 in SPAI preconditioner 389 390 Input Parameters: 391 + pc - the preconditioner 392 - n - maximum number (default 5) 393 394 Level: intermediate 395 396 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 397 @*/ 398 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1) 399 { 400 PetscErrorCode ierr,(*f)(PC,int); 401 PetscFunctionBegin; 402 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr); 403 if (f) { 404 ierr = (*f)(pc,maxnew1);CHKERRQ(ierr); 405 } 406 PetscFunctionReturn(0); 407 } 408 409 /**********************************************************************/ 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "PCSPAISetBlockSize" 413 /*@ 414 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 415 416 Input Parameters: 417 + pc - the preconditioner 418 - n - block size (default 1) 419 420 Notes: A block 421 size of 1 treats A as a matrix of scalar elements. A 422 block size of s > 1 treats A as a matrix of sxs 423 blocks. A block size of 0 treats A as a matrix with 424 variable sized blocks, which are determined by 425 searching for dense square diagonal blocks in A. 426 This can be very effective for finite-element 427 matrices. 428 429 SPAI will convert A to block form, use a block 430 version of the preconditioner algorithm, and then 431 convert the result back to scalar form. 432 433 In many cases the a block-size parameter other than 1 434 can lead to very significant improvement in 435 performance. 436 437 438 Level: intermediate 439 440 .seealso: PCSPAI, PCSetType() 441 @*/ 442 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1) 443 { 444 PetscErrorCode ierr,(*f)(PC,int); 445 PetscFunctionBegin; 446 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr); 447 if (f) { 448 ierr = (*f)(pc,block_size1);CHKERRQ(ierr); 449 } 450 PetscFunctionReturn(0); 451 } 452 453 /**********************************************************************/ 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "PCSPAISetCacheSize" 457 /*@ 458 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 459 460 Input Parameters: 461 + pc - the preconditioner 462 - n - cache size {0,1,2,3,4,5} (default 5) 463 464 Notes: SPAI uses a hash table to cache messages and avoid 465 redundant communication. If suggest always using 466 5. This parameter is irrelevant in the serial 467 version. 468 469 Level: intermediate 470 471 .seealso: PCSPAI, PCSetType() 472 @*/ 473 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size) 474 { 475 PetscErrorCode ierr,(*f)(PC,int); 476 PetscFunctionBegin; 477 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr); 478 if (f) { 479 ierr = (*f)(pc,cache_size);CHKERRQ(ierr); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 /**********************************************************************/ 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "PCSPAISetVerbose" 488 /*@ 489 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 490 491 Input Parameters: 492 + pc - the preconditioner 493 - n - level (default 1) 494 495 Notes: print parameters, timings and matrix statistics 496 497 Level: intermediate 498 499 .seealso: PCSPAI, PCSetType() 500 @*/ 501 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose) 502 { 503 PetscErrorCode ierr,(*f)(PC,int); 504 PetscFunctionBegin; 505 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr); 506 if (f) { 507 ierr = (*f)(pc,verbose);CHKERRQ(ierr); 508 } 509 PetscFunctionReturn(0); 510 } 511 512 /**********************************************************************/ 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "PCSPAISetSp" 516 /*@ 517 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 518 519 Input Parameters: 520 + pc - the preconditioner 521 - n - 0 or 1 522 523 Notes: If A has a symmetric nonzero pattern use -sp 1 to 524 improve performance by eliminating some communication 525 in the parallel version. Even if A does not have a 526 symmetric nonzero pattern -sp 1 may well lead to good 527 results, but the code will not follow the published 528 SPAI algorithm exactly. 529 530 531 Level: intermediate 532 533 .seealso: PCSPAI, PCSetType() 534 @*/ 535 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp) 536 { 537 PetscErrorCode ierr,(*f)(PC,int); 538 PetscFunctionBegin; 539 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr); 540 if (f) { 541 ierr = (*f)(pc,sp);CHKERRQ(ierr); 542 } 543 PetscFunctionReturn(0); 544 } 545 546 /**********************************************************************/ 547 548 /**********************************************************************/ 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "PCSetFromOptions_SPAI" 552 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 553 { 554 PC_SPAI *ispai = (PC_SPAI*)pc->data; 555 PetscErrorCode ierr; 556 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 557 double epsilon1; 558 PetscTruth flg; 559 560 PetscFunctionBegin; 561 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 562 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 563 if (flg) { 564 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 565 } 566 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 567 if (flg) { 568 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 569 } 570 /* added 1/7/99 g.h. */ 571 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 572 if (flg) { 573 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 574 } 575 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 576 if (flg) { 577 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 578 } 579 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 580 if (flg) { 581 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 582 } 583 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 584 if (flg) { 585 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 586 } 587 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 588 if (flg) { 589 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 590 } 591 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 592 if (flg) { 593 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 594 } 595 ierr = PetscOptionsTail();CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 /**********************************************************************/ 600 601 /*MC 602 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 603 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 604 605 Options Database Keys: 606 + -pc_spai_set_epsilon <eps> - set tolerance 607 . -pc_spai_nbstep <n> - set nbsteps 608 . -pc_spai_max <m> - set max 609 . -pc_spai_max_new <m> - set maxnew 610 . -pc_spai_block_size <n> - set block size 611 . -pc_spai_cache_size <n> - set cache size 612 . -pc_spai_sp <m> - set sp 613 - -pc_spai_set_verbose <true,false> - verbose output 614 615 Notes: This only works with AIJ matrices. 616 617 Level: beginner 618 619 Concepts: approximate inverse 620 621 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 622 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 623 PCSPAISetVerbose(), PCSPAISetSp() 624 M*/ 625 626 EXTERN_C_BEGIN 627 #undef __FUNCT__ 628 #define __FUNCT__ "PCCreate_SPAI" 629 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc) 630 { 631 PC_SPAI *ispai; 632 PetscErrorCode ierr; 633 634 PetscFunctionBegin; 635 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 636 pc->data = (void*)ispai; 637 638 pc->ops->destroy = PCDestroy_SPAI; 639 pc->ops->apply = PCApply_SPAI; 640 pc->ops->applyrichardson = 0; 641 pc->ops->setup = PCSetUp_SPAI; 642 pc->ops->view = PCView_SPAI; 643 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 644 645 ispai->epsilon = .4; 646 ispai->nbsteps = 5; 647 ispai->max = 5000; 648 ispai->maxnew = 5; 649 ispai->block_size = 1; 650 ispai->cache_size = 5; 651 ispai->verbose = 0; 652 653 ispai->sp = 1; 654 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 655 656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 657 "PCSPAISetEpsilon_SPAI", 658 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 660 "PCSPAISetNBSteps_SPAI", 661 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 663 "PCSPAISetMax_SPAI", 664 PCSPAISetMax_SPAI);CHKERRQ(ierr); 665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 666 "PCSPAISetMaxNew_SPAI", 667 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 669 "PCSPAISetBlockSize_SPAI", 670 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 672 "PCSPAISetCacheSize_SPAI", 673 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 675 "PCSPAISetVerbose_SPAI", 676 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 677 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 678 "PCSPAISetSp_SPAI", 679 PCSPAISetSp_SPAI);CHKERRQ(ierr); 680 681 PetscFunctionReturn(0); 682 } 683 EXTERN_C_END 684 685 /**********************************************************************/ 686 687 /* 688 Converts from a PETSc matrix to an SPAI matrix 689 */ 690 #undef __FUNCT__ 691 #define __FUNCT__ "ConvertMatToMatrix" 692 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 693 { 694 matrix *M; 695 int i,j,col; 696 int row_indx; 697 int len,pe,local_indx,start_indx; 698 int *mapping; 699 PetscErrorCode ierr; 700 const int *cols; 701 const double *vals; 702 int *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend; 703 struct compressed_lines *rows; 704 705 PetscFunctionBegin; 706 707 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 709 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 710 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 711 712 /* 713 not sure why a barrier is required. commenting out 714 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 715 */ 716 717 M = new_matrix((SPAI_Comm)comm); 718 719 M->n = n; 720 M->bs = 1; 721 M->max_block_size = 1; 722 723 M->mnls = (int*)malloc(sizeof(int)*size); 724 M->start_indices = (int*)malloc(sizeof(int)*size); 725 M->pe = (int*)malloc(sizeof(int)*n); 726 M->block_sizes = (int*)malloc(sizeof(int)*n); 727 for (i=0; i<n; i++) M->block_sizes[i] = 1; 728 729 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 730 731 M->start_indices[0] = 0; 732 for (i=1; i<size; i++) { 733 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 734 } 735 736 M->mnl = M->mnls[M->myid]; 737 M->my_start_index = M->start_indices[M->myid]; 738 739 for (i=0; i<size; i++) { 740 start_indx = M->start_indices[i]; 741 for (j=0; j<M->mnls[i]; j++) 742 M->pe[start_indx+j] = i; 743 } 744 745 if (AT) { 746 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 747 } else { 748 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 749 } 750 751 rows = M->lines; 752 753 /* Determine the mapping from global indices to pointers */ 754 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 755 pe = 0; 756 local_indx = 0; 757 for (i=0; i<M->n; i++) { 758 if (local_indx >= M->mnls[pe]) { 759 pe++; 760 local_indx = 0; 761 } 762 mapping[i] = local_indx + M->start_indices[pe]; 763 local_indx++; 764 } 765 766 767 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 768 769 /*********************************************************/ 770 /************** Set up the row structure *****************/ 771 /*********************************************************/ 772 773 /* count number of nonzeros in every row */ 774 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 775 for (i=rstart; i<rend; i++) { 776 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 777 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 778 } 779 780 /* allocate buffers */ 781 len = 0; 782 for (i=0; i<mnl; i++) { 783 if (len < num_ptr[i]) len = num_ptr[i]; 784 } 785 786 for (i=rstart; i<rend; i++) { 787 row_indx = i-rstart; 788 len = num_ptr[row_indx]; 789 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 790 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 791 } 792 793 /* copy the matrix */ 794 for (i=rstart; i<rend; i++) { 795 row_indx = i - rstart; 796 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 797 for (j=0; j<nz; j++) { 798 col = cols[j]; 799 len = rows->len[row_indx]++; 800 rows->ptrs[row_indx][len] = mapping[col]; 801 rows->A[row_indx][len] = vals[j]; 802 } 803 rows->slen[row_indx] = rows->len[row_indx]; 804 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 805 } 806 807 808 /************************************************************/ 809 /************** Set up the column structure *****************/ 810 /*********************************************************/ 811 812 if (AT) { 813 814 /* count number of nonzeros in every column */ 815 for (i=rstart; i<rend; i++) { 816 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 817 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 818 } 819 820 /* allocate buffers */ 821 len = 0; 822 for (i=0; i<mnl; i++) { 823 if (len < num_ptr[i]) len = num_ptr[i]; 824 } 825 826 for (i=rstart; i<rend; i++) { 827 row_indx = i-rstart; 828 len = num_ptr[row_indx]; 829 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 830 } 831 832 /* copy the matrix (i.e., the structure) */ 833 for (i=rstart; i<rend; i++) { 834 row_indx = i - rstart; 835 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 836 for (j=0; j<nz; j++) { 837 col = cols[j]; 838 len = rows->rlen[row_indx]++; 839 rows->rptrs[row_indx][len] = mapping[col]; 840 } 841 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 842 } 843 } 844 845 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 846 ierr = PetscFree(mapping);CHKERRQ(ierr); 847 848 order_pointers(M); 849 M->maxnz = calc_maxnz(M); 850 851 *B = M; 852 853 PetscFunctionReturn(0); 854 } 855 856 /**********************************************************************/ 857 858 /* 859 Converts from an SPAI matrix B to a PETSc matrix PB. 860 This assumes that the the SPAI matrix B is stored in 861 COMPRESSED-ROW format. 862 */ 863 #undef __FUNCT__ 864 #define __FUNCT__ "ConvertMatrixToMat" 865 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 866 { 867 int size,rank; 868 PetscErrorCode ierr; 869 int m,n,M,N; 870 int d_nz,o_nz; 871 int *d_nnz,*o_nnz; 872 int i,k,global_row,global_col,first_diag_col,last_diag_col; 873 PetscScalar val; 874 875 PetscFunctionBegin; 876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 878 879 m = n = B->mnls[rank]; 880 d_nz = o_nz = 0; 881 882 /* Determine preallocation for MatCreateMPIAIJ */ 883 ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr); 884 ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr); 885 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 886 first_diag_col = B->start_indices[rank]; 887 last_diag_col = first_diag_col + B->mnls[rank]; 888 for (i=0; i<B->mnls[rank]; i++) { 889 for (k=0; k<B->lines->len[i]; k++) { 890 global_col = B->lines->ptrs[i][k]; 891 if ((global_col >= first_diag_col) && (global_col <= last_diag_col)) 892 d_nnz[i]++; 893 else 894 o_nnz[i]++; 895 } 896 } 897 898 M = N = B->n; 899 /* Here we only know how to create AIJ format */ 900 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 901 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 902 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 903 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 904 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 905 906 for (i=0; i<B->mnls[rank]; i++) { 907 global_row = B->start_indices[rank]+i; 908 for (k=0; k<B->lines->len[i]; k++) { 909 global_col = B->lines->ptrs[i][k]; 910 val = B->lines->A[i][k]; 911 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 912 } 913 } 914 915 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 916 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 917 918 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 921 PetscFunctionReturn(0); 922 } 923 924 /**********************************************************************/ 925 926 /* 927 Converts from an SPAI vector v to a PETSc vec Pv. 928 */ 929 #undef __FUNCT__ 930 #define __FUNCT__ "ConvertVectorToVec" 931 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 932 { 933 PetscErrorCode ierr; 934 int size,rank,m,M,i,*mnls,*start_indices,*global_indices; 935 936 PetscFunctionBegin; 937 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 938 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 939 940 m = v->mnl; 941 M = v->n; 942 943 944 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 945 946 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 947 ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr); 948 949 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 950 start_indices[0] = 0; 951 for (i=1; i<size; i++) 952 start_indices[i] = start_indices[i-1] +mnls[i-1]; 953 954 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 955 for (i=0; i<v->mnl; i++) 956 global_indices[i] = start_indices[rank] + i; 957 958 ierr = PetscFree(mnls);CHKERRQ(ierr); 959 ierr = PetscFree(start_indices);CHKERRQ(ierr); 960 961 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 962 963 ierr = PetscFree(global_indices);CHKERRQ(ierr); 964 965 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 966 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 967 968 PetscFunctionReturn(0); 969 } 970 971 972 973 974