1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCSetUp_SPAI" 63 static PetscErrorCode PCSetUp_SPAI(PC pc) 64 { 65 PC_SPAI *ispai = (PC_SPAI*)pc->data; 66 PetscErrorCode ierr; 67 Mat AT; 68 69 PetscFunctionBegin; 70 init_SPAI(); 71 72 if (ispai->sp) { 73 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 74 } else { 75 /* Use the transpose to get the column nonzero structure. */ 76 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 77 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 78 ierr = MatDestroy(&AT);CHKERRQ(ierr); 79 } 80 81 /* Destroy the transpose */ 82 /* Don't know how to do it. PETSc developers? */ 83 84 /* construct SPAI preconditioner */ 85 /* FILE *messages */ /* file for warning messages */ 86 /* double epsilon */ /* tolerance */ 87 /* int nbsteps */ /* max number of "improvement" steps per line */ 88 /* int max */ /* max dimensions of is_I, q, etc. */ 89 /* int maxnew */ /* max number of new entries per step */ 90 /* int block_size */ /* block_size == 1 specifies scalar elments 91 block_size == n specifies nxn constant-block elements 92 block_size == 0 specifies variable-block elements */ 93 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 94 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 95 verbose == 1 prints timing and matrix statistics */ 96 97 ierr = bspai(ispai->B,&ispai->M, 98 stdout, 99 ispai->epsilon, 100 ispai->nbsteps, 101 ispai->max, 102 ispai->maxnew, 103 ispai->block_size, 104 ispai->cache_size, 105 ispai->verbose);CHKERRQ(ierr); 106 107 ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr); 108 109 /* free the SPAI matrices */ 110 sp_free_matrix(ispai->B); 111 sp_free_matrix(ispai->M); 112 PetscFunctionReturn(0); 113 } 114 115 /**********************************************************************/ 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCApply_SPAI" 119 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 120 { 121 PC_SPAI *ispai = (PC_SPAI*)pc->data; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 /* Now using PETSc's multiply */ 126 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 /**********************************************************************/ 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "PCDestroy_SPAI" 134 static PetscErrorCode PCDestroy_SPAI(PC pc) 135 { 136 PetscErrorCode ierr; 137 PC_SPAI *ispai = (PC_SPAI*)pc->data; 138 139 PetscFunctionBegin; 140 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 141 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 142 ierr = PetscFree(pc->data);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 /**********************************************************************/ 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCView_SPAI" 150 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 151 { 152 PC_SPAI *ispai = (PC_SPAI*)pc->data; 153 PetscErrorCode ierr; 154 PetscBool iascii; 155 156 PetscFunctionBegin; 157 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 158 if (iascii) { 159 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 160 ierr = PetscViewerASCIIPrintf(viewer," epsilon %g\n", (double)ispai->epsilon);CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 174 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 175 { 176 PC_SPAI *ispai = (PC_SPAI*)pc->data; 177 178 PetscFunctionBegin; 179 ispai->epsilon = epsilon1; 180 PetscFunctionReturn(0); 181 } 182 183 /**********************************************************************/ 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 187 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 188 { 189 PC_SPAI *ispai = (PC_SPAI*)pc->data; 190 191 PetscFunctionBegin; 192 ispai->nbsteps = nbsteps1; 193 PetscFunctionReturn(0); 194 } 195 196 /**********************************************************************/ 197 198 /* added 1/7/99 g.h. */ 199 #undef __FUNCT__ 200 #define __FUNCT__ "PCSPAISetMax_SPAI" 201 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 202 { 203 PC_SPAI *ispai = (PC_SPAI*)pc->data; 204 205 PetscFunctionBegin; 206 ispai->max = max1; 207 PetscFunctionReturn(0); 208 } 209 210 /**********************************************************************/ 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 214 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 215 { 216 PC_SPAI *ispai = (PC_SPAI*)pc->data; 217 218 PetscFunctionBegin; 219 ispai->maxnew = maxnew1; 220 PetscFunctionReturn(0); 221 } 222 223 /**********************************************************************/ 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 227 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 228 { 229 PC_SPAI *ispai = (PC_SPAI*)pc->data; 230 231 PetscFunctionBegin; 232 ispai->block_size = block_size1; 233 PetscFunctionReturn(0); 234 } 235 236 /**********************************************************************/ 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 240 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 241 { 242 PC_SPAI *ispai = (PC_SPAI*)pc->data; 243 244 PetscFunctionBegin; 245 ispai->cache_size = cache_size; 246 PetscFunctionReturn(0); 247 } 248 249 /**********************************************************************/ 250 251 #undef __FUNCT__ 252 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 253 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 254 { 255 PC_SPAI *ispai = (PC_SPAI*)pc->data; 256 257 PetscFunctionBegin; 258 ispai->verbose = verbose; 259 PetscFunctionReturn(0); 260 } 261 262 /**********************************************************************/ 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "PCSPAISetSp_SPAI" 266 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 267 { 268 PC_SPAI *ispai = (PC_SPAI*)pc->data; 269 270 PetscFunctionBegin; 271 ispai->sp = sp; 272 PetscFunctionReturn(0); 273 } 274 275 /* -------------------------------------------------------------------*/ 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "PCSPAISetEpsilon" 279 /*@ 280 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 281 282 Input Parameters: 283 + pc - the preconditioner 284 - eps - epsilon (default .4) 285 286 Notes: Espilon must be between 0 and 1. It controls the 287 quality of the approximation of M to the inverse of 288 A. Higher values of epsilon lead to more work, more 289 fill, and usually better preconditioners. In many 290 cases the best choice of epsilon is the one that 291 divides the total solution time equally between the 292 preconditioner and the solver. 293 294 Level: intermediate 295 296 .seealso: PCSPAI, PCSetType() 297 @*/ 298 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 /**********************************************************************/ 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "PCSPAISetNBSteps" 311 /*@ 312 PCSPAISetNBSteps - set maximum number of improvement steps per row in 313 the SPAI preconditioner 314 315 Input Parameters: 316 + pc - the preconditioner 317 - n - number of steps (default 5) 318 319 Notes: SPAI constructs to approximation to every column of 320 the exact inverse of A in a series of improvement 321 steps. The quality of the approximation is determined 322 by epsilon. If an approximation achieving an accuracy 323 of epsilon is not obtained after ns steps, SPAI simply 324 uses the best approximation constructed so far. 325 326 Level: intermediate 327 328 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 329 @*/ 330 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 331 { 332 PetscErrorCode ierr; 333 334 PetscFunctionBegin; 335 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 /**********************************************************************/ 340 341 /* added 1/7/99 g.h. */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "PCSPAISetMax" 344 /*@ 345 PCSPAISetMax - set the size of various working buffers in 346 the SPAI preconditioner 347 348 Input Parameters: 349 + pc - the preconditioner 350 - n - size (default is 5000) 351 352 Level: intermediate 353 354 .seealso: PCSPAI, PCSetType() 355 @*/ 356 PetscErrorCode PCSPAISetMax(PC pc,int max1) 357 { 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 /**********************************************************************/ 366 367 #undef __FUNCT__ 368 #define __FUNCT__ "PCSPAISetMaxNew" 369 /*@ 370 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 371 in SPAI preconditioner 372 373 Input Parameters: 374 + pc - the preconditioner 375 - n - maximum number (default 5) 376 377 Level: intermediate 378 379 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 380 @*/ 381 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 382 { 383 PetscErrorCode ierr; 384 385 PetscFunctionBegin; 386 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 /**********************************************************************/ 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "PCSPAISetBlockSize" 394 /*@ 395 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 396 397 Input Parameters: 398 + pc - the preconditioner 399 - n - block size (default 1) 400 401 Notes: A block 402 size of 1 treats A as a matrix of scalar elements. A 403 block size of s > 1 treats A as a matrix of sxs 404 blocks. A block size of 0 treats A as a matrix with 405 variable sized blocks, which are determined by 406 searching for dense square diagonal blocks in A. 407 This can be very effective for finite-element 408 matrices. 409 410 SPAI will convert A to block form, use a block 411 version of the preconditioner algorithm, and then 412 convert the result back to scalar form. 413 414 In many cases the a block-size parameter other than 1 415 can lead to very significant improvement in 416 performance. 417 418 419 Level: intermediate 420 421 .seealso: PCSPAI, PCSetType() 422 @*/ 423 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 424 { 425 PetscErrorCode ierr; 426 427 PetscFunctionBegin; 428 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 /**********************************************************************/ 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "PCSPAISetCacheSize" 436 /*@ 437 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 438 439 Input Parameters: 440 + pc - the preconditioner 441 - n - cache size {0,1,2,3,4,5} (default 5) 442 443 Notes: SPAI uses a hash table to cache messages and avoid 444 redundant communication. If suggest always using 445 5. This parameter is irrelevant in the serial 446 version. 447 448 Level: intermediate 449 450 .seealso: PCSPAI, PCSetType() 451 @*/ 452 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 453 { 454 PetscErrorCode ierr; 455 456 PetscFunctionBegin; 457 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 /**********************************************************************/ 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "PCSPAISetVerbose" 465 /*@ 466 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 467 468 Input Parameters: 469 + pc - the preconditioner 470 - n - level (default 1) 471 472 Notes: print parameters, timings and matrix statistics 473 474 Level: intermediate 475 476 .seealso: PCSPAI, PCSetType() 477 @*/ 478 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 479 { 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 /**********************************************************************/ 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "PCSPAISetSp" 491 /*@ 492 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 493 494 Input Parameters: 495 + pc - the preconditioner 496 - n - 0 or 1 497 498 Notes: If A has a symmetric nonzero pattern use -sp 1 to 499 improve performance by eliminating some communication 500 in the parallel version. Even if A does not have a 501 symmetric nonzero pattern -sp 1 may well lead to good 502 results, but the code will not follow the published 503 SPAI algorithm exactly. 504 505 506 Level: intermediate 507 508 .seealso: PCSPAI, PCSetType() 509 @*/ 510 PetscErrorCode PCSPAISetSp(PC pc,int sp) 511 { 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 /**********************************************************************/ 520 521 /**********************************************************************/ 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PCSetFromOptions_SPAI" 525 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptions *PetscOptionsObject,PC pc) 526 { 527 PC_SPAI *ispai = (PC_SPAI*)pc->data; 528 PetscErrorCode ierr; 529 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 530 double epsilon1; 531 PetscBool flg; 532 533 PetscFunctionBegin; 534 ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr); 535 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 536 if (flg) { 537 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 538 } 539 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 540 if (flg) { 541 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 542 } 543 /* added 1/7/99 g.h. */ 544 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 545 if (flg) { 546 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 547 } 548 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 549 if (flg) { 550 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 551 } 552 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 553 if (flg) { 554 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 555 } 556 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 557 if (flg) { 558 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 559 } 560 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 561 if (flg) { 562 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 563 } 564 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 565 if (flg) { 566 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 567 } 568 ierr = PetscOptionsTail();CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 /**********************************************************************/ 573 574 /*MC 575 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 576 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 577 578 Options Database Keys: 579 + -pc_spai_epsilon <eps> - set tolerance 580 . -pc_spai_nbstep <n> - set nbsteps 581 . -pc_spai_max <m> - set max 582 . -pc_spai_max_new <m> - set maxnew 583 . -pc_spai_block_size <n> - set block size 584 . -pc_spai_cache_size <n> - set cache size 585 . -pc_spai_sp <m> - set sp 586 - -pc_spai_set_verbose <true,false> - verbose output 587 588 Notes: This only works with AIJ matrices. 589 590 Level: beginner 591 592 Concepts: approximate inverse 593 594 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 595 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 596 PCSPAISetVerbose(), PCSPAISetSp() 597 M*/ 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "PCCreate_SPAI" 601 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 602 { 603 PC_SPAI *ispai; 604 PetscErrorCode ierr; 605 606 PetscFunctionBegin; 607 ierr = PetscNewLog(pc,&ispai);CHKERRQ(ierr); 608 pc->data = ispai; 609 610 pc->ops->destroy = PCDestroy_SPAI; 611 pc->ops->apply = PCApply_SPAI; 612 pc->ops->applyrichardson = 0; 613 pc->ops->setup = PCSetUp_SPAI; 614 pc->ops->view = PCView_SPAI; 615 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 616 617 ispai->epsilon = .4; 618 ispai->nbsteps = 5; 619 ispai->max = 5000; 620 ispai->maxnew = 5; 621 ispai->block_size = 1; 622 ispai->cache_size = 5; 623 ispai->verbose = 0; 624 625 ispai->sp = 1; 626 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr); 627 628 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 629 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 630 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr); 631 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 632 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 633 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 634 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 635 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 /**********************************************************************/ 640 641 /* 642 Converts from a PETSc matrix to an SPAI matrix 643 */ 644 #undef __FUNCT__ 645 #define __FUNCT__ "ConvertMatToMatrix" 646 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 647 { 648 matrix *M; 649 int i,j,col; 650 int row_indx; 651 int len,pe,local_indx,start_indx; 652 int *mapping; 653 PetscErrorCode ierr; 654 const int *cols; 655 const double *vals; 656 int n,mnl,nnl,nz,rstart,rend; 657 PetscMPIInt size,rank; 658 struct compressed_lines *rows; 659 660 PetscFunctionBegin; 661 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 662 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 663 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 664 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 665 666 /* 667 not sure why a barrier is required. commenting out 668 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 669 */ 670 671 M = new_matrix((SPAI_Comm)comm); 672 673 M->n = n; 674 M->bs = 1; 675 M->max_block_size = 1; 676 677 M->mnls = (int*)malloc(sizeof(int)*size); 678 M->start_indices = (int*)malloc(sizeof(int)*size); 679 M->pe = (int*)malloc(sizeof(int)*n); 680 M->block_sizes = (int*)malloc(sizeof(int)*n); 681 for (i=0; i<n; i++) M->block_sizes[i] = 1; 682 683 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 684 685 M->start_indices[0] = 0; 686 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 687 688 M->mnl = M->mnls[M->myid]; 689 M->my_start_index = M->start_indices[M->myid]; 690 691 for (i=0; i<size; i++) { 692 start_indx = M->start_indices[i]; 693 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 694 } 695 696 if (AT) { 697 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 698 } else { 699 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 700 } 701 702 rows = M->lines; 703 704 /* Determine the mapping from global indices to pointers */ 705 ierr = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr); 706 pe = 0; 707 local_indx = 0; 708 for (i=0; i<M->n; i++) { 709 if (local_indx >= M->mnls[pe]) { 710 pe++; 711 local_indx = 0; 712 } 713 mapping[i] = local_indx + M->start_indices[pe]; 714 local_indx++; 715 } 716 717 /*********************************************************/ 718 /************** Set up the row structure *****************/ 719 /*********************************************************/ 720 721 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 722 for (i=rstart; i<rend; i++) { 723 row_indx = i - rstart; 724 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 725 /* allocate buffers */ 726 rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 727 rows->A[row_indx] = (double*)malloc(nz*sizeof(double)); 728 /* copy the matrix */ 729 for (j=0; j<nz; j++) { 730 col = cols[j]; 731 len = rows->len[row_indx]++; 732 733 rows->ptrs[row_indx][len] = mapping[col]; 734 rows->A[row_indx][len] = vals[j]; 735 } 736 rows->slen[row_indx] = rows->len[row_indx]; 737 738 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 739 } 740 741 742 /************************************************************/ 743 /************** Set up the column structure *****************/ 744 /*********************************************************/ 745 746 if (AT) { 747 748 for (i=rstart; i<rend; i++) { 749 row_indx = i - rstart; 750 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 751 /* allocate buffers */ 752 rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 753 /* copy the matrix (i.e., the structure) */ 754 for (j=0; j<nz; j++) { 755 col = cols[j]; 756 len = rows->rlen[row_indx]++; 757 758 rows->rptrs[row_indx][len] = mapping[col]; 759 } 760 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 761 } 762 } 763 764 ierr = PetscFree(mapping);CHKERRQ(ierr); 765 766 order_pointers(M); 767 M->maxnz = calc_maxnz(M); 768 *B = M; 769 PetscFunctionReturn(0); 770 } 771 772 /**********************************************************************/ 773 774 /* 775 Converts from an SPAI matrix B to a PETSc matrix PB. 776 This assumes that the SPAI matrix B is stored in 777 COMPRESSED-ROW format. 778 */ 779 #undef __FUNCT__ 780 #define __FUNCT__ "ConvertMatrixToMat" 781 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 782 { 783 PetscMPIInt size,rank; 784 PetscErrorCode ierr; 785 int m,n,M,N; 786 int d_nz,o_nz; 787 int *d_nnz,*o_nnz; 788 int i,k,global_row,global_col,first_diag_col,last_diag_col; 789 PetscScalar val; 790 791 PetscFunctionBegin; 792 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 793 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 794 795 m = n = B->mnls[rank]; 796 d_nz = o_nz = 0; 797 798 /* Determine preallocation for MatCreateMPIAIJ */ 799 ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr); 800 ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr); 801 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 802 first_diag_col = B->start_indices[rank]; 803 last_diag_col = first_diag_col + B->mnls[rank]; 804 for (i=0; i<B->mnls[rank]; i++) { 805 for (k=0; k<B->lines->len[i]; k++) { 806 global_col = B->lines->ptrs[i][k]; 807 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 808 else o_nnz[i]++; 809 } 810 } 811 812 M = N = B->n; 813 /* Here we only know how to create AIJ format */ 814 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 815 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 816 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 817 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 818 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 819 820 for (i=0; i<B->mnls[rank]; i++) { 821 global_row = B->start_indices[rank]+i; 822 for (k=0; k<B->lines->len[i]; k++) { 823 global_col = B->lines->ptrs[i][k]; 824 825 val = B->lines->A[i][k]; 826 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 827 } 828 } 829 830 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 831 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 832 833 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 /**********************************************************************/ 839 840 /* 841 Converts from an SPAI vector v to a PETSc vec Pv. 842 */ 843 #undef __FUNCT__ 844 #define __FUNCT__ "ConvertVectorToVec" 845 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 846 { 847 PetscErrorCode ierr; 848 PetscMPIInt size,rank; 849 int m,M,i,*mnls,*start_indices,*global_indices; 850 851 PetscFunctionBegin; 852 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 853 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 854 855 m = v->mnl; 856 M = v->n; 857 858 859 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 860 861 ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr); 862 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 863 864 ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr); 865 866 start_indices[0] = 0; 867 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 868 869 ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr); 870 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 871 872 ierr = PetscFree(mnls);CHKERRQ(ierr); 873 ierr = PetscFree(start_indices);CHKERRQ(ierr); 874 875 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 876 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 877 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 878 879 ierr = PetscFree(global_indices);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 885 886