1 #define PETSCKSP_DLL 2 3 /* 4 3/99 Modified by Stephen Barnard to support SPAI version 3.0 5 */ 6 7 /* 8 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 9 Code written by Stephen Barnard. 10 11 Note: there is some BAD memory bleeding below! 12 13 This code needs work 14 15 1) get rid of all memory bleeding 16 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 17 rather than having the sp flag for PC_SPAI 18 19 */ 20 21 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #include "spai.h" 29 #include "matrix.h" 30 EXTERN_C_END 31 32 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 33 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 34 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 35 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *); 36 37 typedef struct { 38 39 matrix *B; /* matrix in SPAI format */ 40 matrix *BT; /* transpose of matrix in SPAI format */ 41 matrix *M; /* the approximate inverse in SPAI format */ 42 43 Mat PM; /* the approximate inverse PETSc format */ 44 45 double epsilon; /* tolerance */ 46 int nbsteps; /* max number of "improvement" steps per line */ 47 int max; /* max dimensions of is_I, q, etc. */ 48 int maxnew; /* max number of new entries per step */ 49 int block_size; /* constant block size */ 50 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 51 int verbose; /* SPAI prints timing and statistics */ 52 53 int sp; /* symmetric nonzero pattern */ 54 MPI_Comm comm_spai; /* communicator to be used with spai */ 55 } PC_SPAI; 56 57 /**********************************************************************/ 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCSetUp_SPAI" 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 PetscErrorCode ierr; 65 Mat AT; 66 67 PetscFunctionBegin; 68 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 93 /* cache_size == 0 indicates no caching */ 94 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 95 verbose == 1 prints timing and matrix statistics */ 96 97 ierr = bspai(ispai->B,&ispai->M, 98 stdout, 99 ispai->epsilon, 100 ispai->nbsteps, 101 ispai->max, 102 ispai->maxnew, 103 ispai->block_size, 104 ispai->cache_size, 105 ispai->verbose); CHKERRQ(ierr); 106 107 ierr = ConvertMatrixToMat(pc->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 108 109 /* free the SPAI matrices */ 110 sp_free_matrix(ispai->B); 111 sp_free_matrix(ispai->M); 112 113 PetscFunctionReturn(0); 114 } 115 116 /**********************************************************************/ 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCApply_SPAI" 120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 121 { 122 PC_SPAI *ispai = (PC_SPAI*)pc->data; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 /* Now using PETSc's multiply */ 127 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /**********************************************************************/ 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "PCDestroy_SPAI" 135 static PetscErrorCode PCDestroy_SPAI(PC pc) 136 { 137 PetscErrorCode ierr; 138 PC_SPAI *ispai = (PC_SPAI*)pc->data; 139 140 PetscFunctionBegin; 141 if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);} 142 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 143 ierr = PetscFree(ispai);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /**********************************************************************/ 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCView_SPAI" 151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 152 { 153 PC_SPAI *ispai = (PC_SPAI*)pc->data; 154 PetscErrorCode ierr; 155 PetscTruth iascii; 156 157 PetscFunctionBegin; 158 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 159 if (iascii) { 160 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 169 170 } 171 PetscFunctionReturn(0); 172 } 173 174 EXTERN_C_BEGIN 175 #undef __FUNCT__ 176 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 177 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 178 { 179 PC_SPAI *ispai = (PC_SPAI*)pc->data; 180 PetscFunctionBegin; 181 ispai->epsilon = epsilon1; 182 PetscFunctionReturn(0); 183 } 184 EXTERN_C_END 185 186 /**********************************************************************/ 187 188 EXTERN_C_BEGIN 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 191 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 192 { 193 PC_SPAI *ispai = (PC_SPAI*)pc->data; 194 PetscFunctionBegin; 195 ispai->nbsteps = nbsteps1; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 200 /**********************************************************************/ 201 202 /* added 1/7/99 g.h. */ 203 EXTERN_C_BEGIN 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCSPAISetMax_SPAI" 206 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1) 207 { 208 PC_SPAI *ispai = (PC_SPAI*)pc->data; 209 PetscFunctionBegin; 210 ispai->max = max1; 211 PetscFunctionReturn(0); 212 } 213 EXTERN_C_END 214 215 /**********************************************************************/ 216 217 EXTERN_C_BEGIN 218 #undef __FUNCT__ 219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 220 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 221 { 222 PC_SPAI *ispai = (PC_SPAI*)pc->data; 223 PetscFunctionBegin; 224 ispai->maxnew = maxnew1; 225 PetscFunctionReturn(0); 226 } 227 EXTERN_C_END 228 229 /**********************************************************************/ 230 231 EXTERN_C_BEGIN 232 #undef __FUNCT__ 233 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 234 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 235 { 236 PC_SPAI *ispai = (PC_SPAI*)pc->data; 237 PetscFunctionBegin; 238 ispai->block_size = block_size1; 239 PetscFunctionReturn(0); 240 } 241 EXTERN_C_END 242 243 /**********************************************************************/ 244 245 EXTERN_C_BEGIN 246 #undef __FUNCT__ 247 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 248 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 249 { 250 PC_SPAI *ispai = (PC_SPAI*)pc->data; 251 PetscFunctionBegin; 252 ispai->cache_size = cache_size; 253 PetscFunctionReturn(0); 254 } 255 EXTERN_C_END 256 257 /**********************************************************************/ 258 259 EXTERN_C_BEGIN 260 #undef __FUNCT__ 261 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 262 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose) 263 { 264 PC_SPAI *ispai = (PC_SPAI*)pc->data; 265 PetscFunctionBegin; 266 ispai->verbose = verbose; 267 PetscFunctionReturn(0); 268 } 269 EXTERN_C_END 270 271 /**********************************************************************/ 272 273 EXTERN_C_BEGIN 274 #undef __FUNCT__ 275 #define __FUNCT__ "PCSPAISetSp_SPAI" 276 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp) 277 { 278 PC_SPAI *ispai = (PC_SPAI*)pc->data; 279 PetscFunctionBegin; 280 ispai->sp = sp; 281 PetscFunctionReturn(0); 282 } 283 EXTERN_C_END 284 285 /* -------------------------------------------------------------------*/ 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "PCSPAISetEpsilon" 289 /*@ 290 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 291 292 Input Parameters: 293 + pc - the preconditioner 294 - eps - epsilon (default .4) 295 296 Notes: Espilon must be between 0 and 1. It controls the 297 quality of the approximation of M to the inverse of 298 A. Higher values of epsilon lead to more work, more 299 fill, and usually better preconditioners. In many 300 cases the best choice of epsilon is the one that 301 divides the total solution time equally between the 302 preconditioner and the solver. 303 304 Level: intermediate 305 306 .seealso: PCSPAI, PCSetType() 307 @*/ 308 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1) 309 { 310 PetscErrorCode ierr,(*f)(PC,double); 311 PetscFunctionBegin; 312 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr); 313 if (f) { 314 ierr = (*f)(pc,epsilon1);CHKERRQ(ierr); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 /**********************************************************************/ 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "PCSPAISetNBSteps" 323 /*@ 324 PCSPAISetNBSteps - set maximum number of improvement steps per row in 325 the SPAI preconditioner 326 327 Input Parameters: 328 + pc - the preconditioner 329 - n - number of steps (default 5) 330 331 Notes: SPAI constructs to approximation to every column of 332 the exact inverse of A in a series of improvement 333 steps. The quality of the approximation is determined 334 by epsilon. If an approximation achieving an accuracy 335 of epsilon is not obtained after ns steps, SPAI simply 336 uses the best approximation constructed so far. 337 338 Level: intermediate 339 340 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 341 @*/ 342 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1) 343 { 344 PetscErrorCode ierr,(*f)(PC,int); 345 PetscFunctionBegin; 346 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr); 347 if (f) { 348 ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr); 349 } 350 PetscFunctionReturn(0); 351 } 352 353 /**********************************************************************/ 354 355 /* added 1/7/99 g.h. */ 356 #undef __FUNCT__ 357 #define __FUNCT__ "PCSPAISetMax" 358 /*@ 359 PCSPAISetMax - set the size of various working buffers in 360 the SPAI preconditioner 361 362 Input Parameters: 363 + pc - the preconditioner 364 - n - size (default is 5000) 365 366 Level: intermediate 367 368 .seealso: PCSPAI, PCSetType() 369 @*/ 370 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1) 371 { 372 PetscErrorCode ierr,(*f)(PC,int); 373 PetscFunctionBegin; 374 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr); 375 if (f) { 376 ierr = (*f)(pc,max1);CHKERRQ(ierr); 377 } 378 PetscFunctionReturn(0); 379 } 380 381 /**********************************************************************/ 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "PCSPAISetMaxNew" 385 /*@ 386 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 387 in SPAI preconditioner 388 389 Input Parameters: 390 + pc - the preconditioner 391 - n - maximum number (default 5) 392 393 Level: intermediate 394 395 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 396 @*/ 397 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1) 398 { 399 PetscErrorCode ierr,(*f)(PC,int); 400 PetscFunctionBegin; 401 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr); 402 if (f) { 403 ierr = (*f)(pc,maxnew1);CHKERRQ(ierr); 404 } 405 PetscFunctionReturn(0); 406 } 407 408 /**********************************************************************/ 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCSPAISetBlockSize" 412 /*@ 413 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 414 415 Input Parameters: 416 + pc - the preconditioner 417 - n - block size (default 1) 418 419 Notes: A block 420 size of 1 treats A as a matrix of scalar elements. A 421 block size of s > 1 treats A as a matrix of sxs 422 blocks. A block size of 0 treats A as a matrix with 423 variable sized blocks, which are determined by 424 searching for dense square diagonal blocks in A. 425 This can be very effective for finite-element 426 matrices. 427 428 SPAI will convert A to block form, use a block 429 version of the preconditioner algorithm, and then 430 convert the result back to scalar form. 431 432 In many cases the a block-size parameter other than 1 433 can lead to very significant improvement in 434 performance. 435 436 437 Level: intermediate 438 439 .seealso: PCSPAI, PCSetType() 440 @*/ 441 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1) 442 { 443 PetscErrorCode ierr,(*f)(PC,int); 444 PetscFunctionBegin; 445 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr); 446 if (f) { 447 ierr = (*f)(pc,block_size1);CHKERRQ(ierr); 448 } 449 PetscFunctionReturn(0); 450 } 451 452 /**********************************************************************/ 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "PCSPAISetCacheSize" 456 /*@ 457 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 458 459 Input Parameters: 460 + pc - the preconditioner 461 - n - cache size {0,1,2,3,4,5} (default 5) 462 463 Notes: SPAI uses a hash table to cache messages and avoid 464 redundant communication. If suggest always using 465 5. This parameter is irrelevant in the serial 466 version. 467 468 Level: intermediate 469 470 .seealso: PCSPAI, PCSetType() 471 @*/ 472 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size) 473 { 474 PetscErrorCode ierr,(*f)(PC,int); 475 PetscFunctionBegin; 476 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr); 477 if (f) { 478 ierr = (*f)(pc,cache_size);CHKERRQ(ierr); 479 } 480 PetscFunctionReturn(0); 481 } 482 483 /**********************************************************************/ 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCSPAISetVerbose" 487 /*@ 488 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 489 490 Input Parameters: 491 + pc - the preconditioner 492 - n - level (default 1) 493 494 Notes: print parameters, timings and matrix statistics 495 496 Level: intermediate 497 498 .seealso: PCSPAI, PCSetType() 499 @*/ 500 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose) 501 { 502 PetscErrorCode ierr,(*f)(PC,int); 503 PetscFunctionBegin; 504 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr); 505 if (f) { 506 ierr = (*f)(pc,verbose);CHKERRQ(ierr); 507 } 508 PetscFunctionReturn(0); 509 } 510 511 /**********************************************************************/ 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "PCSPAISetSp" 515 /*@ 516 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 517 518 Input Parameters: 519 + pc - the preconditioner 520 - n - 0 or 1 521 522 Notes: If A has a symmetric nonzero pattern use -sp 1 to 523 improve performance by eliminating some communication 524 in the parallel version. Even if A does not have a 525 symmetric nonzero pattern -sp 1 may well lead to good 526 results, but the code will not follow the published 527 SPAI algorithm exactly. 528 529 530 Level: intermediate 531 532 .seealso: PCSPAI, PCSetType() 533 @*/ 534 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp) 535 { 536 PetscErrorCode ierr,(*f)(PC,int); 537 PetscFunctionBegin; 538 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr); 539 if (f) { 540 ierr = (*f)(pc,sp);CHKERRQ(ierr); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 /**********************************************************************/ 546 547 /**********************************************************************/ 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "PCSetFromOptions_SPAI" 551 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 552 { 553 PC_SPAI *ispai = (PC_SPAI*)pc->data; 554 PetscErrorCode ierr; 555 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 556 double epsilon1; 557 PetscTruth flg; 558 559 PetscFunctionBegin; 560 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 561 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 562 if (flg) { 563 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 564 } 565 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 566 if (flg) { 567 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 568 } 569 /* added 1/7/99 g.h. */ 570 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 571 if (flg) { 572 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 573 } 574 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 575 if (flg) { 576 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 577 } 578 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 579 if (flg) { 580 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 581 } 582 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 583 if (flg) { 584 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 585 } 586 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 587 if (flg) { 588 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 589 } 590 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 591 if (flg) { 592 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 593 } 594 ierr = PetscOptionsTail();CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 /**********************************************************************/ 599 600 /*MC 601 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 602 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 603 604 Options Database Keys: 605 + -pc_spai_set_epsilon <eps> - set tolerance 606 . -pc_spai_nbstep <n> - set nbsteps 607 . -pc_spai_max <m> - set max 608 . -pc_spai_max_new <m> - set maxnew 609 . -pc_spai_block_size <n> - set block size 610 . -pc_spai_cache_size <n> - set cache size 611 . -pc_spai_sp <m> - set sp 612 - -pc_spai_set_verbose <true,false> - verbose output 613 614 Notes: This only works with AIJ matrices. 615 616 Level: beginner 617 618 Concepts: approximate inverse 619 620 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 621 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 622 PCSPAISetVerbose(), PCSPAISetSp() 623 M*/ 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "PCCreate_SPAI" 628 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc) 629 { 630 PC_SPAI *ispai; 631 PetscErrorCode ierr; 632 633 PetscFunctionBegin; 634 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 635 pc->data = (void*)ispai; 636 637 pc->ops->destroy = PCDestroy_SPAI; 638 pc->ops->apply = PCApply_SPAI; 639 pc->ops->applyrichardson = 0; 640 pc->ops->setup = PCSetUp_SPAI; 641 pc->ops->view = PCView_SPAI; 642 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 643 644 pc->name = 0; 645 ispai->epsilon = .4; 646 ispai->nbsteps = 5; 647 ispai->max = 5000; 648 ispai->maxnew = 5; 649 ispai->block_size = 1; 650 ispai->cache_size = 5; 651 ispai->verbose = 0; 652 653 ispai->sp = 1; 654 ierr = MPI_Comm_dup(pc->comm,&(ispai->comm_spai));CHKERRQ(ierr); 655 656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 657 "PCSPAISetEpsilon_SPAI", 658 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 660 "PCSPAISetNBSteps_SPAI", 661 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 663 "PCSPAISetMax_SPAI", 664 PCSPAISetMax_SPAI);CHKERRQ(ierr); 665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 666 "PCSPAISetMaxNew_SPAI", 667 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 669 "PCSPAISetBlockSize_SPAI", 670 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 672 "PCSPAISetCacheSize_SPAI", 673 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 675 "PCSPAISetVerbose_SPAI", 676 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 677 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 678 "PCSPAISetSp_SPAI", 679 PCSPAISetSp_SPAI);CHKERRQ(ierr); 680 681 PetscFunctionReturn(0); 682 } 683 EXTERN_C_END 684 685 /**********************************************************************/ 686 687 /* 688 Converts from a PETSc matrix to an SPAI matrix 689 */ 690 #undef __FUNCT__ 691 #define __FUNCT__ "ConvertMatToMatrix" 692 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 693 { 694 matrix *M; 695 int i,j,col; 696 int row_indx; 697 int len,pe,local_indx,start_indx; 698 int *mapping; 699 PetscErrorCode ierr; 700 const int *cols; 701 const double *vals; 702 int *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend; 703 struct compressed_lines *rows; 704 705 PetscFunctionBegin; 706 707 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 709 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 710 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 711 712 /* 713 not sure why a barrier is required. commenting out 714 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 715 */ 716 717 M = new_matrix((void*)comm); 718 719 M->n = n; 720 M->bs = 1; 721 M->max_block_size = 1; 722 723 M->mnls = (int*)malloc(sizeof(int)*size); 724 M->start_indices = (int*)malloc(sizeof(int)*size); 725 M->pe = (int*)malloc(sizeof(int)*n); 726 M->block_sizes = (int*)malloc(sizeof(int)*n); 727 for (i=0; i<n; i++) M->block_sizes[i] = 1; 728 729 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 730 731 M->start_indices[0] = 0; 732 for (i=1; i<size; i++) { 733 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 734 } 735 736 M->mnl = M->mnls[M->myid]; 737 M->my_start_index = M->start_indices[M->myid]; 738 739 for (i=0; i<size; i++) { 740 start_indx = M->start_indices[i]; 741 for (j=0; j<M->mnls[i]; j++) 742 M->pe[start_indx+j] = i; 743 } 744 745 if (AT) { 746 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 747 } else { 748 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 749 } 750 751 rows = M->lines; 752 753 /* Determine the mapping from global indices to pointers */ 754 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 755 pe = 0; 756 local_indx = 0; 757 for (i=0; i<M->n; i++) { 758 if (local_indx >= M->mnls[pe]) { 759 pe++; 760 local_indx = 0; 761 } 762 mapping[i] = local_indx + M->start_indices[pe]; 763 local_indx++; 764 } 765 766 767 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 768 769 /*********************************************************/ 770 /************** Set up the row structure *****************/ 771 /*********************************************************/ 772 773 /* count number of nonzeros in every row */ 774 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 775 for (i=rstart; i<rend; i++) { 776 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 777 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 778 } 779 780 /* allocate buffers */ 781 len = 0; 782 for (i=0; i<mnl; i++) { 783 if (len < num_ptr[i]) len = num_ptr[i]; 784 } 785 786 for (i=rstart; i<rend; i++) { 787 row_indx = i-rstart; 788 len = num_ptr[row_indx]; 789 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 790 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 791 } 792 793 /* copy the matrix */ 794 for (i=rstart; i<rend; i++) { 795 row_indx = i - rstart; 796 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 797 for (j=0; j<nz; j++) { 798 col = cols[j]; 799 len = rows->len[row_indx]++; 800 rows->ptrs[row_indx][len] = mapping[col]; 801 rows->A[row_indx][len] = vals[j]; 802 } 803 rows->slen[row_indx] = rows->len[row_indx]; 804 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 805 } 806 807 808 /************************************************************/ 809 /************** Set up the column structure *****************/ 810 /*********************************************************/ 811 812 if (AT) { 813 814 /* count number of nonzeros in every column */ 815 for (i=rstart; i<rend; i++) { 816 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 817 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 818 } 819 820 /* allocate buffers */ 821 len = 0; 822 for (i=0; i<mnl; i++) { 823 if (len < num_ptr[i]) len = num_ptr[i]; 824 } 825 826 for (i=rstart; i<rend; i++) { 827 row_indx = i-rstart; 828 len = num_ptr[row_indx]; 829 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 830 } 831 832 /* copy the matrix (i.e., the structure) */ 833 for (i=rstart; i<rend; i++) { 834 row_indx = i - rstart; 835 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 836 for (j=0; j<nz; j++) { 837 col = cols[j]; 838 len = rows->rlen[row_indx]++; 839 rows->rptrs[row_indx][len] = mapping[col]; 840 } 841 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 842 } 843 } 844 845 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 846 ierr = PetscFree(mapping);CHKERRQ(ierr); 847 848 order_pointers(M); 849 M->maxnz = calc_maxnz(M); 850 851 *B = M; 852 853 PetscFunctionReturn(0); 854 } 855 856 /**********************************************************************/ 857 858 /* 859 Converts from an SPAI matrix B to a PETSc matrix PB. 860 This assumes that the the SPAI matrix B is stored in 861 COMPRESSED-ROW format. 862 */ 863 #undef __FUNCT__ 864 #define __FUNCT__ "ConvertMatrixToMat" 865 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 866 { 867 int size,rank; 868 PetscErrorCode ierr; 869 int m,n,M,N; 870 int d_nz,o_nz; 871 int *d_nnz,*o_nnz; 872 int i,k,global_row,global_col,first_diag_col,last_diag_col; 873 PetscScalar val; 874 875 PetscFunctionBegin; 876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 878 879 m = n = B->mnls[rank]; 880 d_nz = o_nz = 0; 881 882 /* Determine preallocation for MatCreateMPIAIJ */ 883 ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr); 884 ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr); 885 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 886 first_diag_col = B->start_indices[rank]; 887 last_diag_col = first_diag_col + B->mnls[rank]; 888 for (i=0; i<B->mnls[rank]; i++) { 889 for (k=0; k<B->lines->len[i]; k++) { 890 global_col = B->lines->ptrs[i][k]; 891 if ((global_col >= first_diag_col) && (global_col <= last_diag_col)) 892 d_nnz[i]++; 893 else 894 o_nnz[i]++; 895 } 896 } 897 898 M = N = B->n; 899 /* Here we only know how to create AIJ format */ 900 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 901 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 902 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 903 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 904 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 905 906 for (i=0; i<B->mnls[rank]; i++) { 907 global_row = B->start_indices[rank]+i; 908 for (k=0; k<B->lines->len[i]; k++) { 909 global_col = B->lines->ptrs[i][k]; 910 val = B->lines->A[i][k]; 911 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 912 } 913 } 914 915 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 916 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 917 918 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920 921 PetscFunctionReturn(0); 922 } 923 924 /**********************************************************************/ 925 926 /* 927 Converts from an SPAI vector v to a PETSc vec Pv. 928 */ 929 #undef __FUNCT__ 930 #define __FUNCT__ "ConvertVectorToVec" 931 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 932 { 933 PetscErrorCode ierr; 934 int size,rank,m,M,i,*mnls,*start_indices,*global_indices; 935 936 PetscFunctionBegin; 937 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 938 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 939 940 m = v->mnl; 941 M = v->n; 942 943 944 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 945 946 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 947 ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr); 948 949 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 950 start_indices[0] = 0; 951 for (i=1; i<size; i++) 952 start_indices[i] = start_indices[i-1] +mnls[i-1]; 953 954 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 955 for (i=0; i<v->mnl; i++) 956 global_indices[i] = start_indices[rank] + i; 957 958 ierr = PetscFree(mnls);CHKERRQ(ierr); 959 ierr = PetscFree(start_indices);CHKERRQ(ierr); 960 961 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 962 963 ierr = PetscFree(global_indices);CHKERRQ(ierr); 964 965 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 966 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 967 968 PetscFunctionReturn(0); 969 } 970 971 972 973 974