1 #define PETSCKSP_DLL 2 3 /* 4 3/99 Modified by Stephen Barnard to support SPAI version 3.0 5 */ 6 7 /* 8 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 9 Code written by Stephen Barnard. 10 11 Note: there is some BAD memory bleeding below! 12 13 This code needs work 14 15 1) get rid of all memory bleeding 16 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 17 rather than having the sp flag for PC_SPAI 18 3) fix to set the block size based on the matrix block size 19 20 */ 21 22 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 23 #include "petscspai.h" 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include "spai.h" 31 #include "matrix.h" 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 extern PetscErrorCode MM_to_PETSC(char *,char *,char *); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCSetUp_SPAI" 63 static PetscErrorCode PCSetUp_SPAI(PC pc) 64 { 65 PC_SPAI *ispai = (PC_SPAI*)pc->data; 66 PetscErrorCode ierr; 67 Mat AT; 68 69 PetscFunctionBegin; 70 71 init_SPAI(); 72 73 if (ispai->sp) { 74 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 75 } else { 76 /* Use the transpose to get the column nonzero structure. */ 77 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 78 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 79 ierr = MatDestroy(AT);CHKERRQ(ierr); 80 } 81 82 /* Destroy the transpose */ 83 /* Don't know how to do it. PETSc developers? */ 84 85 /* construct SPAI preconditioner */ 86 /* FILE *messages */ /* file for warning messages */ 87 /* double epsilon */ /* tolerance */ 88 /* int nbsteps */ /* max number of "improvement" steps per line */ 89 /* int max */ /* max dimensions of is_I, q, etc. */ 90 /* int maxnew */ /* max number of new entries per step */ 91 /* int block_size */ /* block_size == 1 specifies scalar elments 92 block_size == n specifies nxn constant-block elements 93 block_size == 0 specifies variable-block elements */ 94 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 95 /* cache_size == 0 indicates no caching */ 96 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 97 verbose == 1 prints timing and matrix statistics */ 98 99 ierr = bspai(ispai->B,&ispai->M, 100 stdout, 101 ispai->epsilon, 102 ispai->nbsteps, 103 ispai->max, 104 ispai->maxnew, 105 ispai->block_size, 106 ispai->cache_size, 107 ispai->verbose);CHKERRQ(ierr); 108 109 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 110 111 /* free the SPAI matrices */ 112 sp_free_matrix(ispai->B); 113 sp_free_matrix(ispai->M); 114 115 PetscFunctionReturn(0); 116 } 117 118 /**********************************************************************/ 119 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCApply_SPAI" 122 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 123 { 124 PC_SPAI *ispai = (PC_SPAI*)pc->data; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 /* Now using PETSc's multiply */ 129 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /**********************************************************************/ 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "PCDestroy_SPAI" 137 static PetscErrorCode PCDestroy_SPAI(PC pc) 138 { 139 PetscErrorCode ierr; 140 PC_SPAI *ispai = (PC_SPAI*)pc->data; 141 142 PetscFunctionBegin; 143 if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);} 144 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 145 ierr = PetscFree(ispai);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 /**********************************************************************/ 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "PCView_SPAI" 153 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 154 { 155 PC_SPAI *ispai = (PC_SPAI*)pc->data; 156 PetscErrorCode ierr; 157 PetscBool iascii; 158 159 PetscFunctionBegin; 160 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 161 if (iascii) { 162 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 169 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 170 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 EXTERN_C_BEGIN 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 178 PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 179 { 180 PC_SPAI *ispai = (PC_SPAI*)pc->data; 181 PetscFunctionBegin; 182 ispai->epsilon = epsilon1; 183 PetscFunctionReturn(0); 184 } 185 EXTERN_C_END 186 187 /**********************************************************************/ 188 189 EXTERN_C_BEGIN 190 #undef __FUNCT__ 191 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 192 PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 193 { 194 PC_SPAI *ispai = (PC_SPAI*)pc->data; 195 PetscFunctionBegin; 196 ispai->nbsteps = nbsteps1; 197 PetscFunctionReturn(0); 198 } 199 EXTERN_C_END 200 201 /**********************************************************************/ 202 203 /* added 1/7/99 g.h. */ 204 EXTERN_C_BEGIN 205 #undef __FUNCT__ 206 #define __FUNCT__ "PCSPAISetMax_SPAI" 207 PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 208 { 209 PC_SPAI *ispai = (PC_SPAI*)pc->data; 210 PetscFunctionBegin; 211 ispai->max = max1; 212 PetscFunctionReturn(0); 213 } 214 EXTERN_C_END 215 216 /**********************************************************************/ 217 218 EXTERN_C_BEGIN 219 #undef __FUNCT__ 220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 221 PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 222 { 223 PC_SPAI *ispai = (PC_SPAI*)pc->data; 224 PetscFunctionBegin; 225 ispai->maxnew = maxnew1; 226 PetscFunctionReturn(0); 227 } 228 EXTERN_C_END 229 230 /**********************************************************************/ 231 232 EXTERN_C_BEGIN 233 #undef __FUNCT__ 234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 235 PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 236 { 237 PC_SPAI *ispai = (PC_SPAI*)pc->data; 238 PetscFunctionBegin; 239 ispai->block_size = block_size1; 240 PetscFunctionReturn(0); 241 } 242 EXTERN_C_END 243 244 /**********************************************************************/ 245 246 EXTERN_C_BEGIN 247 #undef __FUNCT__ 248 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 249 PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 250 { 251 PC_SPAI *ispai = (PC_SPAI*)pc->data; 252 PetscFunctionBegin; 253 ispai->cache_size = cache_size; 254 PetscFunctionReturn(0); 255 } 256 EXTERN_C_END 257 258 /**********************************************************************/ 259 260 EXTERN_C_BEGIN 261 #undef __FUNCT__ 262 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 263 PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 264 { 265 PC_SPAI *ispai = (PC_SPAI*)pc->data; 266 PetscFunctionBegin; 267 ispai->verbose = verbose; 268 PetscFunctionReturn(0); 269 } 270 EXTERN_C_END 271 272 /**********************************************************************/ 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "PCSPAISetSp_SPAI" 277 PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 278 { 279 PC_SPAI *ispai = (PC_SPAI*)pc->data; 280 PetscFunctionBegin; 281 ispai->sp = sp; 282 PetscFunctionReturn(0); 283 } 284 EXTERN_C_END 285 286 /* -------------------------------------------------------------------*/ 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "PCSPAISetEpsilon" 290 /*@ 291 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 292 293 Input Parameters: 294 + pc - the preconditioner 295 - eps - epsilon (default .4) 296 297 Notes: Espilon must be between 0 and 1. It controls the 298 quality of the approximation of M to the inverse of 299 A. Higher values of epsilon lead to more work, more 300 fill, and usually better preconditioners. In many 301 cases the best choice of epsilon is the one that 302 divides the total solution time equally between the 303 preconditioner and the solver. 304 305 Level: intermediate 306 307 .seealso: PCSPAI, PCSetType() 308 @*/ 309 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 310 { 311 PetscErrorCode ierr; 312 PetscFunctionBegin; 313 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 /**********************************************************************/ 318 319 #undef __FUNCT__ 320 #define __FUNCT__ "PCSPAISetNBSteps" 321 /*@ 322 PCSPAISetNBSteps - set maximum number of improvement steps per row in 323 the SPAI preconditioner 324 325 Input Parameters: 326 + pc - the preconditioner 327 - n - number of steps (default 5) 328 329 Notes: SPAI constructs to approximation to every column of 330 the exact inverse of A in a series of improvement 331 steps. The quality of the approximation is determined 332 by epsilon. If an approximation achieving an accuracy 333 of epsilon is not obtained after ns steps, SPAI simply 334 uses the best approximation constructed so far. 335 336 Level: intermediate 337 338 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 339 @*/ 340 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 341 { 342 PetscErrorCode ierr; 343 PetscFunctionBegin; 344 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /**********************************************************************/ 349 350 /* added 1/7/99 g.h. */ 351 #undef __FUNCT__ 352 #define __FUNCT__ "PCSPAISetMax" 353 /*@ 354 PCSPAISetMax - set the size of various working buffers in 355 the SPAI preconditioner 356 357 Input Parameters: 358 + pc - the preconditioner 359 - n - size (default is 5000) 360 361 Level: intermediate 362 363 .seealso: PCSPAI, PCSetType() 364 @*/ 365 PetscErrorCode PCSPAISetMax(PC pc,int max1) 366 { 367 PetscErrorCode ierr; 368 PetscFunctionBegin; 369 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 370 PetscFunctionReturn(0); 371 } 372 373 /**********************************************************************/ 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCSPAISetMaxNew" 377 /*@ 378 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 379 in SPAI preconditioner 380 381 Input Parameters: 382 + pc - the preconditioner 383 - n - maximum number (default 5) 384 385 Level: intermediate 386 387 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 388 @*/ 389 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 390 { 391 PetscErrorCode ierr; 392 PetscFunctionBegin; 393 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /**********************************************************************/ 398 399 #undef __FUNCT__ 400 #define __FUNCT__ "PCSPAISetBlockSize" 401 /*@ 402 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 403 404 Input Parameters: 405 + pc - the preconditioner 406 - n - block size (default 1) 407 408 Notes: A block 409 size of 1 treats A as a matrix of scalar elements. A 410 block size of s > 1 treats A as a matrix of sxs 411 blocks. A block size of 0 treats A as a matrix with 412 variable sized blocks, which are determined by 413 searching for dense square diagonal blocks in A. 414 This can be very effective for finite-element 415 matrices. 416 417 SPAI will convert A to block form, use a block 418 version of the preconditioner algorithm, and then 419 convert the result back to scalar form. 420 421 In many cases the a block-size parameter other than 1 422 can lead to very significant improvement in 423 performance. 424 425 426 Level: intermediate 427 428 .seealso: PCSPAI, PCSetType() 429 @*/ 430 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 431 { 432 PetscErrorCode ierr; 433 PetscFunctionBegin; 434 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 /**********************************************************************/ 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "PCSPAISetCacheSize" 442 /*@ 443 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 444 445 Input Parameters: 446 + pc - the preconditioner 447 - n - cache size {0,1,2,3,4,5} (default 5) 448 449 Notes: SPAI uses a hash table to cache messages and avoid 450 redundant communication. If suggest always using 451 5. This parameter is irrelevant in the serial 452 version. 453 454 Level: intermediate 455 456 .seealso: PCSPAI, PCSetType() 457 @*/ 458 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 459 { 460 PetscErrorCode ierr; 461 PetscFunctionBegin; 462 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 /**********************************************************************/ 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "PCSPAISetVerbose" 470 /*@ 471 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 472 473 Input Parameters: 474 + pc - the preconditioner 475 - n - level (default 1) 476 477 Notes: print parameters, timings and matrix statistics 478 479 Level: intermediate 480 481 .seealso: PCSPAI, PCSetType() 482 @*/ 483 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 484 { 485 PetscErrorCode ierr; 486 PetscFunctionBegin; 487 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 /**********************************************************************/ 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "PCSPAISetSp" 495 /*@ 496 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 497 498 Input Parameters: 499 + pc - the preconditioner 500 - n - 0 or 1 501 502 Notes: If A has a symmetric nonzero pattern use -sp 1 to 503 improve performance by eliminating some communication 504 in the parallel version. Even if A does not have a 505 symmetric nonzero pattern -sp 1 may well lead to good 506 results, but the code will not follow the published 507 SPAI algorithm exactly. 508 509 510 Level: intermediate 511 512 .seealso: PCSPAI, PCSetType() 513 @*/ 514 PetscErrorCode PCSPAISetSp(PC pc,int sp) 515 { 516 PetscErrorCode ierr; 517 PetscFunctionBegin; 518 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 /**********************************************************************/ 523 524 /**********************************************************************/ 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCSetFromOptions_SPAI" 528 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 529 { 530 PC_SPAI *ispai = (PC_SPAI*)pc->data; 531 PetscErrorCode ierr; 532 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 533 double epsilon1; 534 PetscBool flg; 535 536 PetscFunctionBegin; 537 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 538 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 539 if (flg) { 540 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 541 } 542 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 543 if (flg) { 544 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 545 } 546 /* added 1/7/99 g.h. */ 547 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 548 if (flg) { 549 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 550 } 551 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 552 if (flg) { 553 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 554 } 555 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 556 if (flg) { 557 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 558 } 559 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 560 if (flg) { 561 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 562 } 563 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 564 if (flg) { 565 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 566 } 567 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 568 if (flg) { 569 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 570 } 571 ierr = PetscOptionsTail();CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 /**********************************************************************/ 576 577 /*MC 578 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 579 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 580 581 Options Database Keys: 582 + -pc_spai_epsilon <eps> - set tolerance 583 . -pc_spai_nbstep <n> - set nbsteps 584 . -pc_spai_max <m> - set max 585 . -pc_spai_max_new <m> - set maxnew 586 . -pc_spai_block_size <n> - set block size 587 . -pc_spai_cache_size <n> - set cache size 588 . -pc_spai_sp <m> - set sp 589 - -pc_spai_set_verbose <true,false> - verbose output 590 591 Notes: This only works with AIJ matrices. 592 593 Level: beginner 594 595 Concepts: approximate inverse 596 597 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 598 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 599 PCSPAISetVerbose(), PCSPAISetSp() 600 M*/ 601 602 EXTERN_C_BEGIN 603 #undef __FUNCT__ 604 #define __FUNCT__ "PCCreate_SPAI" 605 PetscErrorCode PCCreate_SPAI(PC pc) 606 { 607 PC_SPAI *ispai; 608 PetscErrorCode ierr; 609 610 PetscFunctionBegin; 611 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 612 pc->data = ispai; 613 614 pc->ops->destroy = PCDestroy_SPAI; 615 pc->ops->apply = PCApply_SPAI; 616 pc->ops->applyrichardson = 0; 617 pc->ops->setup = PCSetUp_SPAI; 618 pc->ops->view = PCView_SPAI; 619 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 620 621 ispai->epsilon = .4; 622 ispai->nbsteps = 5; 623 ispai->max = 5000; 624 ispai->maxnew = 5; 625 ispai->block_size = 1; 626 ispai->cache_size = 5; 627 ispai->verbose = 0; 628 629 ispai->sp = 1; 630 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 631 632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 633 "PCSPAISetEpsilon_SPAI", 634 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 636 "PCSPAISetNBSteps_SPAI", 637 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 639 "PCSPAISetMax_SPAI", 640 PCSPAISetMax_SPAI);CHKERRQ(ierr); 641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 642 "PCSPAISetMaxNew_SPAI", 643 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 645 "PCSPAISetBlockSize_SPAI", 646 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 647 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 648 "PCSPAISetCacheSize_SPAI", 649 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 650 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 651 "PCSPAISetVerbose_SPAI", 652 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 654 "PCSPAISetSp_SPAI", 655 PCSPAISetSp_SPAI);CHKERRQ(ierr); 656 657 PetscFunctionReturn(0); 658 } 659 EXTERN_C_END 660 661 /**********************************************************************/ 662 663 /* 664 Converts from a PETSc matrix to an SPAI matrix 665 */ 666 #undef __FUNCT__ 667 #define __FUNCT__ "ConvertMatToMatrix" 668 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 669 { 670 matrix *M; 671 int i,j,col; 672 int row_indx; 673 int len,pe,local_indx,start_indx; 674 int *mapping; 675 PetscErrorCode ierr; 676 const int *cols; 677 const double *vals; 678 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 679 PetscMPIInt size,rank; 680 struct compressed_lines *rows; 681 682 PetscFunctionBegin; 683 684 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 685 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 686 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 687 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 688 689 /* 690 not sure why a barrier is required. commenting out 691 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 692 */ 693 694 M = new_matrix((SPAI_Comm)comm); 695 696 M->n = n; 697 M->bs = 1; 698 M->max_block_size = 1; 699 700 M->mnls = (int*)malloc(sizeof(int)*size); 701 M->start_indices = (int*)malloc(sizeof(int)*size); 702 M->pe = (int*)malloc(sizeof(int)*n); 703 M->block_sizes = (int*)malloc(sizeof(int)*n); 704 for (i=0; i<n; i++) M->block_sizes[i] = 1; 705 706 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 707 708 M->start_indices[0] = 0; 709 for (i=1; i<size; i++) { 710 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 711 } 712 713 M->mnl = M->mnls[M->myid]; 714 M->my_start_index = M->start_indices[M->myid]; 715 716 for (i=0; i<size; i++) { 717 start_indx = M->start_indices[i]; 718 for (j=0; j<M->mnls[i]; j++) 719 M->pe[start_indx+j] = i; 720 } 721 722 if (AT) { 723 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 724 } else { 725 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 726 } 727 728 rows = M->lines; 729 730 /* Determine the mapping from global indices to pointers */ 731 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 732 pe = 0; 733 local_indx = 0; 734 for (i=0; i<M->n; i++) { 735 if (local_indx >= M->mnls[pe]) { 736 pe++; 737 local_indx = 0; 738 } 739 mapping[i] = local_indx + M->start_indices[pe]; 740 local_indx++; 741 } 742 743 744 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 745 746 /*********************************************************/ 747 /************** Set up the row structure *****************/ 748 /*********************************************************/ 749 750 /* count number of nonzeros in every row */ 751 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 752 for (i=rstart; i<rend; i++) { 753 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 754 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 755 } 756 757 /* allocate buffers */ 758 len = 0; 759 for (i=0; i<mnl; i++) { 760 if (len < num_ptr[i]) len = num_ptr[i]; 761 } 762 763 for (i=rstart; i<rend; i++) { 764 row_indx = i-rstart; 765 len = num_ptr[row_indx]; 766 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 767 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 768 } 769 770 /* copy the matrix */ 771 for (i=rstart; i<rend; i++) { 772 row_indx = i - rstart; 773 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 774 for (j=0; j<nz; j++) { 775 col = cols[j]; 776 len = rows->len[row_indx]++; 777 rows->ptrs[row_indx][len] = mapping[col]; 778 rows->A[row_indx][len] = vals[j]; 779 } 780 rows->slen[row_indx] = rows->len[row_indx]; 781 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 782 } 783 784 785 /************************************************************/ 786 /************** Set up the column structure *****************/ 787 /*********************************************************/ 788 789 if (AT) { 790 791 /* count number of nonzeros in every column */ 792 for (i=rstart; i<rend; i++) { 793 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 794 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 795 } 796 797 /* allocate buffers */ 798 len = 0; 799 for (i=0; i<mnl; i++) { 800 if (len < num_ptr[i]) len = num_ptr[i]; 801 } 802 803 for (i=rstart; i<rend; i++) { 804 row_indx = i-rstart; 805 len = num_ptr[row_indx]; 806 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 807 } 808 809 /* copy the matrix (i.e., the structure) */ 810 for (i=rstart; i<rend; i++) { 811 row_indx = i - rstart; 812 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 813 for (j=0; j<nz; j++) { 814 col = cols[j]; 815 len = rows->rlen[row_indx]++; 816 rows->rptrs[row_indx][len] = mapping[col]; 817 } 818 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 819 } 820 } 821 822 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 823 ierr = PetscFree(mapping);CHKERRQ(ierr); 824 825 order_pointers(M); 826 M->maxnz = calc_maxnz(M); 827 828 *B = M; 829 830 PetscFunctionReturn(0); 831 } 832 833 /**********************************************************************/ 834 835 /* 836 Converts from an SPAI matrix B to a PETSc matrix PB. 837 This assumes that the the SPAI matrix B is stored in 838 COMPRESSED-ROW format. 839 */ 840 #undef __FUNCT__ 841 #define __FUNCT__ "ConvertMatrixToMat" 842 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 843 { 844 PetscMPIInt size,rank; 845 PetscErrorCode ierr; 846 int m,n,M,N; 847 int d_nz,o_nz; 848 int *d_nnz,*o_nnz; 849 int i,k,global_row,global_col,first_diag_col,last_diag_col; 850 PetscScalar val; 851 852 PetscFunctionBegin; 853 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 854 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 855 856 m = n = B->mnls[rank]; 857 d_nz = o_nz = 0; 858 859 /* Determine preallocation for MatCreateMPIAIJ */ 860 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 861 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 862 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 863 first_diag_col = B->start_indices[rank]; 864 last_diag_col = first_diag_col + B->mnls[rank]; 865 for (i=0; i<B->mnls[rank]; i++) { 866 for (k=0; k<B->lines->len[i]; k++) { 867 global_col = B->lines->ptrs[i][k]; 868 if ((global_col >= first_diag_col) && (global_col <= last_diag_col)) 869 d_nnz[i]++; 870 else 871 o_nnz[i]++; 872 } 873 } 874 875 M = N = B->n; 876 /* Here we only know how to create AIJ format */ 877 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 878 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 879 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 880 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 881 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 882 883 for (i=0; i<B->mnls[rank]; i++) { 884 global_row = B->start_indices[rank]+i; 885 for (k=0; k<B->lines->len[i]; k++) { 886 global_col = B->lines->ptrs[i][k]; 887 val = B->lines->A[i][k]; 888 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 889 } 890 } 891 892 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 893 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 894 895 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 898 PetscFunctionReturn(0); 899 } 900 901 /**********************************************************************/ 902 903 /* 904 Converts from an SPAI vector v to a PETSc vec Pv. 905 */ 906 #undef __FUNCT__ 907 #define __FUNCT__ "ConvertVectorToVec" 908 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 909 { 910 PetscErrorCode ierr; 911 PetscMPIInt size,rank; 912 int m,M,i,*mnls,*start_indices,*global_indices; 913 914 PetscFunctionBegin; 915 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 916 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 917 918 m = v->mnl; 919 M = v->n; 920 921 922 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 923 924 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 925 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 926 927 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 928 start_indices[0] = 0; 929 for (i=1; i<size; i++) 930 start_indices[i] = start_indices[i-1] +mnls[i-1]; 931 932 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 933 for (i=0; i<v->mnl; i++) 934 global_indices[i] = start_indices[rank] + i; 935 936 ierr = PetscFree(mnls);CHKERRQ(ierr); 937 ierr = PetscFree(start_indices);CHKERRQ(ierr); 938 939 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 940 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 941 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 942 943 ierr = PetscFree(global_indices);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 948 949 950