1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 21 #include <private/pcimpl.h> /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include <spai.h> 30 #include <matrix.h> 31 EXTERN_C_END 32 33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 70 init_SPAI(); 71 72 if (ispai->sp) { 73 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 74 } else { 75 /* Use the transpose to get the column nonzero structure. */ 76 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 77 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 78 ierr = MatDestroy(&AT);CHKERRQ(ierr); 79 } 80 81 /* Destroy the transpose */ 82 /* Don't know how to do it. PETSc developers? */ 83 84 /* construct SPAI preconditioner */ 85 /* FILE *messages */ /* file for warning messages */ 86 /* double epsilon */ /* tolerance */ 87 /* int nbsteps */ /* max number of "improvement" steps per line */ 88 /* int max */ /* max dimensions of is_I, q, etc. */ 89 /* int maxnew */ /* max number of new entries per step */ 90 /* int block_size */ /* block_size == 1 specifies scalar elments 91 block_size == n specifies nxn constant-block elements 92 block_size == 0 specifies variable-block elements */ 93 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 94 /* cache_size == 0 indicates no caching */ 95 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 96 verbose == 1 prints timing and matrix statistics */ 97 98 ierr = bspai(ispai->B,&ispai->M, 99 stdout, 100 ispai->epsilon, 101 ispai->nbsteps, 102 ispai->max, 103 ispai->maxnew, 104 ispai->block_size, 105 ispai->cache_size, 106 ispai->verbose);CHKERRQ(ierr); 107 108 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 109 110 /* free the SPAI matrices */ 111 sp_free_matrix(ispai->B); 112 sp_free_matrix(ispai->M); 113 114 PetscFunctionReturn(0); 115 } 116 117 /**********************************************************************/ 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCApply_SPAI" 121 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 122 { 123 PC_SPAI *ispai = (PC_SPAI*)pc->data; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 /* Now using PETSc's multiply */ 128 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /**********************************************************************/ 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "PCDestroy_SPAI" 136 static PetscErrorCode PCDestroy_SPAI(PC pc) 137 { 138 PetscErrorCode ierr; 139 PC_SPAI *ispai = (PC_SPAI*)pc->data; 140 141 PetscFunctionBegin; 142 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 143 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 144 ierr = PetscFree(pc->data);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 /**********************************************************************/ 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "PCView_SPAI" 152 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 153 { 154 PC_SPAI *ispai = (PC_SPAI*)pc->data; 155 PetscErrorCode ierr; 156 PetscBool iascii; 157 158 PetscFunctionBegin; 159 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 160 if (iascii) { 161 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 169 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 EXTERN_C_BEGIN 175 #undef __FUNCT__ 176 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 177 PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 178 { 179 PC_SPAI *ispai = (PC_SPAI*)pc->data; 180 PetscFunctionBegin; 181 ispai->epsilon = epsilon1; 182 PetscFunctionReturn(0); 183 } 184 EXTERN_C_END 185 186 /**********************************************************************/ 187 188 EXTERN_C_BEGIN 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 191 PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 192 { 193 PC_SPAI *ispai = (PC_SPAI*)pc->data; 194 PetscFunctionBegin; 195 ispai->nbsteps = nbsteps1; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 200 /**********************************************************************/ 201 202 /* added 1/7/99 g.h. */ 203 EXTERN_C_BEGIN 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCSPAISetMax_SPAI" 206 PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 207 { 208 PC_SPAI *ispai = (PC_SPAI*)pc->data; 209 PetscFunctionBegin; 210 ispai->max = max1; 211 PetscFunctionReturn(0); 212 } 213 EXTERN_C_END 214 215 /**********************************************************************/ 216 217 EXTERN_C_BEGIN 218 #undef __FUNCT__ 219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 220 PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 221 { 222 PC_SPAI *ispai = (PC_SPAI*)pc->data; 223 PetscFunctionBegin; 224 ispai->maxnew = maxnew1; 225 PetscFunctionReturn(0); 226 } 227 EXTERN_C_END 228 229 /**********************************************************************/ 230 231 EXTERN_C_BEGIN 232 #undef __FUNCT__ 233 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 234 PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 235 { 236 PC_SPAI *ispai = (PC_SPAI*)pc->data; 237 PetscFunctionBegin; 238 ispai->block_size = block_size1; 239 PetscFunctionReturn(0); 240 } 241 EXTERN_C_END 242 243 /**********************************************************************/ 244 245 EXTERN_C_BEGIN 246 #undef __FUNCT__ 247 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 248 PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 249 { 250 PC_SPAI *ispai = (PC_SPAI*)pc->data; 251 PetscFunctionBegin; 252 ispai->cache_size = cache_size; 253 PetscFunctionReturn(0); 254 } 255 EXTERN_C_END 256 257 /**********************************************************************/ 258 259 EXTERN_C_BEGIN 260 #undef __FUNCT__ 261 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 262 PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 263 { 264 PC_SPAI *ispai = (PC_SPAI*)pc->data; 265 PetscFunctionBegin; 266 ispai->verbose = verbose; 267 PetscFunctionReturn(0); 268 } 269 EXTERN_C_END 270 271 /**********************************************************************/ 272 273 EXTERN_C_BEGIN 274 #undef __FUNCT__ 275 #define __FUNCT__ "PCSPAISetSp_SPAI" 276 PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 277 { 278 PC_SPAI *ispai = (PC_SPAI*)pc->data; 279 PetscFunctionBegin; 280 ispai->sp = sp; 281 PetscFunctionReturn(0); 282 } 283 EXTERN_C_END 284 285 /* -------------------------------------------------------------------*/ 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "PCSPAISetEpsilon" 289 /*@ 290 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 291 292 Input Parameters: 293 + pc - the preconditioner 294 - eps - epsilon (default .4) 295 296 Notes: Espilon must be between 0 and 1. It controls the 297 quality of the approximation of M to the inverse of 298 A. Higher values of epsilon lead to more work, more 299 fill, and usually better preconditioners. In many 300 cases the best choice of epsilon is the one that 301 divides the total solution time equally between the 302 preconditioner and the solver. 303 304 Level: intermediate 305 306 .seealso: PCSPAI, PCSetType() 307 @*/ 308 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 309 { 310 PetscErrorCode ierr; 311 PetscFunctionBegin; 312 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 /**********************************************************************/ 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "PCSPAISetNBSteps" 320 /*@ 321 PCSPAISetNBSteps - set maximum number of improvement steps per row in 322 the SPAI preconditioner 323 324 Input Parameters: 325 + pc - the preconditioner 326 - n - number of steps (default 5) 327 328 Notes: SPAI constructs to approximation to every column of 329 the exact inverse of A in a series of improvement 330 steps. The quality of the approximation is determined 331 by epsilon. If an approximation achieving an accuracy 332 of epsilon is not obtained after ns steps, SPAI simply 333 uses the best approximation constructed so far. 334 335 Level: intermediate 336 337 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 338 @*/ 339 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 340 { 341 PetscErrorCode ierr; 342 PetscFunctionBegin; 343 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 /**********************************************************************/ 348 349 /* added 1/7/99 g.h. */ 350 #undef __FUNCT__ 351 #define __FUNCT__ "PCSPAISetMax" 352 /*@ 353 PCSPAISetMax - set the size of various working buffers in 354 the SPAI preconditioner 355 356 Input Parameters: 357 + pc - the preconditioner 358 - n - size (default is 5000) 359 360 Level: intermediate 361 362 .seealso: PCSPAI, PCSetType() 363 @*/ 364 PetscErrorCode PCSPAISetMax(PC pc,int max1) 365 { 366 PetscErrorCode ierr; 367 PetscFunctionBegin; 368 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 372 /**********************************************************************/ 373 374 #undef __FUNCT__ 375 #define __FUNCT__ "PCSPAISetMaxNew" 376 /*@ 377 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 378 in SPAI preconditioner 379 380 Input Parameters: 381 + pc - the preconditioner 382 - n - maximum number (default 5) 383 384 Level: intermediate 385 386 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 387 @*/ 388 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 389 { 390 PetscErrorCode ierr; 391 PetscFunctionBegin; 392 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 /**********************************************************************/ 397 398 #undef __FUNCT__ 399 #define __FUNCT__ "PCSPAISetBlockSize" 400 /*@ 401 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 402 403 Input Parameters: 404 + pc - the preconditioner 405 - n - block size (default 1) 406 407 Notes: A block 408 size of 1 treats A as a matrix of scalar elements. A 409 block size of s > 1 treats A as a matrix of sxs 410 blocks. A block size of 0 treats A as a matrix with 411 variable sized blocks, which are determined by 412 searching for dense square diagonal blocks in A. 413 This can be very effective for finite-element 414 matrices. 415 416 SPAI will convert A to block form, use a block 417 version of the preconditioner algorithm, and then 418 convert the result back to scalar form. 419 420 In many cases the a block-size parameter other than 1 421 can lead to very significant improvement in 422 performance. 423 424 425 Level: intermediate 426 427 .seealso: PCSPAI, PCSetType() 428 @*/ 429 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 430 { 431 PetscErrorCode ierr; 432 PetscFunctionBegin; 433 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 /**********************************************************************/ 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "PCSPAISetCacheSize" 441 /*@ 442 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 443 444 Input Parameters: 445 + pc - the preconditioner 446 - n - cache size {0,1,2,3,4,5} (default 5) 447 448 Notes: SPAI uses a hash table to cache messages and avoid 449 redundant communication. If suggest always using 450 5. This parameter is irrelevant in the serial 451 version. 452 453 Level: intermediate 454 455 .seealso: PCSPAI, PCSetType() 456 @*/ 457 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 458 { 459 PetscErrorCode ierr; 460 PetscFunctionBegin; 461 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 /**********************************************************************/ 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "PCSPAISetVerbose" 469 /*@ 470 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 471 472 Input Parameters: 473 + pc - the preconditioner 474 - n - level (default 1) 475 476 Notes: print parameters, timings and matrix statistics 477 478 Level: intermediate 479 480 .seealso: PCSPAI, PCSetType() 481 @*/ 482 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 483 { 484 PetscErrorCode ierr; 485 PetscFunctionBegin; 486 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 /**********************************************************************/ 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "PCSPAISetSp" 494 /*@ 495 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 496 497 Input Parameters: 498 + pc - the preconditioner 499 - n - 0 or 1 500 501 Notes: If A has a symmetric nonzero pattern use -sp 1 to 502 improve performance by eliminating some communication 503 in the parallel version. Even if A does not have a 504 symmetric nonzero pattern -sp 1 may well lead to good 505 results, but the code will not follow the published 506 SPAI algorithm exactly. 507 508 509 Level: intermediate 510 511 .seealso: PCSPAI, PCSetType() 512 @*/ 513 PetscErrorCode PCSPAISetSp(PC pc,int sp) 514 { 515 PetscErrorCode ierr; 516 PetscFunctionBegin; 517 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 /**********************************************************************/ 522 523 /**********************************************************************/ 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "PCSetFromOptions_SPAI" 527 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 528 { 529 PC_SPAI *ispai = (PC_SPAI*)pc->data; 530 PetscErrorCode ierr; 531 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 532 double epsilon1; 533 PetscBool flg; 534 535 PetscFunctionBegin; 536 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 537 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 538 if (flg) { 539 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 540 } 541 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 542 if (flg) { 543 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 544 } 545 /* added 1/7/99 g.h. */ 546 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 547 if (flg) { 548 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 549 } 550 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 551 if (flg) { 552 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 553 } 554 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 555 if (flg) { 556 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 557 } 558 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 559 if (flg) { 560 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 561 } 562 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 563 if (flg) { 564 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 565 } 566 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 567 if (flg) { 568 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 569 } 570 ierr = PetscOptionsTail();CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 /**********************************************************************/ 575 576 /*MC 577 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 578 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 579 580 Options Database Keys: 581 + -pc_spai_epsilon <eps> - set tolerance 582 . -pc_spai_nbstep <n> - set nbsteps 583 . -pc_spai_max <m> - set max 584 . -pc_spai_max_new <m> - set maxnew 585 . -pc_spai_block_size <n> - set block size 586 . -pc_spai_cache_size <n> - set cache size 587 . -pc_spai_sp <m> - set sp 588 - -pc_spai_set_verbose <true,false> - verbose output 589 590 Notes: This only works with AIJ matrices. 591 592 Level: beginner 593 594 Concepts: approximate inverse 595 596 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 597 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 598 PCSPAISetVerbose(), PCSPAISetSp() 599 M*/ 600 601 EXTERN_C_BEGIN 602 #undef __FUNCT__ 603 #define __FUNCT__ "PCCreate_SPAI" 604 PetscErrorCode PCCreate_SPAI(PC pc) 605 { 606 PC_SPAI *ispai; 607 PetscErrorCode ierr; 608 609 PetscFunctionBegin; 610 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 611 pc->data = ispai; 612 613 pc->ops->destroy = PCDestroy_SPAI; 614 pc->ops->apply = PCApply_SPAI; 615 pc->ops->applyrichardson = 0; 616 pc->ops->setup = PCSetUp_SPAI; 617 pc->ops->view = PCView_SPAI; 618 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 619 620 ispai->epsilon = .4; 621 ispai->nbsteps = 5; 622 ispai->max = 5000; 623 ispai->maxnew = 5; 624 ispai->block_size = 1; 625 ispai->cache_size = 5; 626 ispai->verbose = 0; 627 628 ispai->sp = 1; 629 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 630 631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 632 "PCSPAISetEpsilon_SPAI", 633 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 635 "PCSPAISetNBSteps_SPAI", 636 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 638 "PCSPAISetMax_SPAI", 639 PCSPAISetMax_SPAI);CHKERRQ(ierr); 640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 641 "PCSPAISetMaxNew_SPAI", 642 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 643 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 644 "PCSPAISetBlockSize_SPAI", 645 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 647 "PCSPAISetCacheSize_SPAI", 648 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 649 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 650 "PCSPAISetVerbose_SPAI", 651 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 652 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 653 "PCSPAISetSp_SPAI", 654 PCSPAISetSp_SPAI);CHKERRQ(ierr); 655 656 PetscFunctionReturn(0); 657 } 658 EXTERN_C_END 659 660 /**********************************************************************/ 661 662 /* 663 Converts from a PETSc matrix to an SPAI matrix 664 */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "ConvertMatToMatrix" 667 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 668 { 669 matrix *M; 670 int i,j,col; 671 int row_indx; 672 int len,pe,local_indx,start_indx; 673 int *mapping; 674 PetscErrorCode ierr; 675 const int *cols; 676 const double *vals; 677 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 678 PetscMPIInt size,rank; 679 struct compressed_lines *rows; 680 681 PetscFunctionBegin; 682 683 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 684 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 685 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 686 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 687 688 /* 689 not sure why a barrier is required. commenting out 690 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 691 */ 692 693 M = new_matrix((SPAI_Comm)comm); 694 695 M->n = n; 696 M->bs = 1; 697 M->max_block_size = 1; 698 699 M->mnls = (int*)malloc(sizeof(int)*size); 700 M->start_indices = (int*)malloc(sizeof(int)*size); 701 M->pe = (int*)malloc(sizeof(int)*n); 702 M->block_sizes = (int*)malloc(sizeof(int)*n); 703 for (i=0; i<n; i++) M->block_sizes[i] = 1; 704 705 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 706 707 M->start_indices[0] = 0; 708 for (i=1; i<size; i++) { 709 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 710 } 711 712 M->mnl = M->mnls[M->myid]; 713 M->my_start_index = M->start_indices[M->myid]; 714 715 for (i=0; i<size; i++) { 716 start_indx = M->start_indices[i]; 717 for (j=0; j<M->mnls[i]; j++) 718 M->pe[start_indx+j] = i; 719 } 720 721 if (AT) { 722 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 723 } else { 724 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 725 } 726 727 rows = M->lines; 728 729 /* Determine the mapping from global indices to pointers */ 730 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 731 pe = 0; 732 local_indx = 0; 733 for (i=0; i<M->n; i++) { 734 if (local_indx >= M->mnls[pe]) { 735 pe++; 736 local_indx = 0; 737 } 738 mapping[i] = local_indx + M->start_indices[pe]; 739 local_indx++; 740 } 741 742 743 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 744 745 /*********************************************************/ 746 /************** Set up the row structure *****************/ 747 /*********************************************************/ 748 749 /* count number of nonzeros in every row */ 750 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 751 for (i=rstart; i<rend; i++) { 752 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 753 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 754 } 755 756 /* allocate buffers */ 757 len = 0; 758 for (i=0; i<mnl; i++) { 759 if (len < num_ptr[i]) len = num_ptr[i]; 760 } 761 762 for (i=rstart; i<rend; i++) { 763 row_indx = i-rstart; 764 len = num_ptr[row_indx]; 765 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 766 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 767 } 768 769 /* copy the matrix */ 770 for (i=rstart; i<rend; i++) { 771 row_indx = i - rstart; 772 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 773 for (j=0; j<nz; j++) { 774 col = cols[j]; 775 len = rows->len[row_indx]++; 776 rows->ptrs[row_indx][len] = mapping[col]; 777 rows->A[row_indx][len] = vals[j]; 778 } 779 rows->slen[row_indx] = rows->len[row_indx]; 780 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 781 } 782 783 784 /************************************************************/ 785 /************** Set up the column structure *****************/ 786 /*********************************************************/ 787 788 if (AT) { 789 790 /* count number of nonzeros in every column */ 791 for (i=rstart; i<rend; i++) { 792 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 793 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 794 } 795 796 /* allocate buffers */ 797 len = 0; 798 for (i=0; i<mnl; i++) { 799 if (len < num_ptr[i]) len = num_ptr[i]; 800 } 801 802 for (i=rstart; i<rend; i++) { 803 row_indx = i-rstart; 804 len = num_ptr[row_indx]; 805 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 806 } 807 808 /* copy the matrix (i.e., the structure) */ 809 for (i=rstart; i<rend; i++) { 810 row_indx = i - rstart; 811 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 812 for (j=0; j<nz; j++) { 813 col = cols[j]; 814 len = rows->rlen[row_indx]++; 815 rows->rptrs[row_indx][len] = mapping[col]; 816 } 817 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 818 } 819 } 820 821 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 822 ierr = PetscFree(mapping);CHKERRQ(ierr); 823 824 order_pointers(M); 825 M->maxnz = calc_maxnz(M); 826 827 *B = M; 828 829 PetscFunctionReturn(0); 830 } 831 832 /**********************************************************************/ 833 834 /* 835 Converts from an SPAI matrix B to a PETSc matrix PB. 836 This assumes that the the SPAI matrix B is stored in 837 COMPRESSED-ROW format. 838 */ 839 #undef __FUNCT__ 840 #define __FUNCT__ "ConvertMatrixToMat" 841 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 842 { 843 PetscMPIInt size,rank; 844 PetscErrorCode ierr; 845 int m,n,M,N; 846 int d_nz,o_nz; 847 int *d_nnz,*o_nnz; 848 int i,k,global_row,global_col,first_diag_col,last_diag_col; 849 PetscScalar val; 850 851 PetscFunctionBegin; 852 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 853 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 854 855 m = n = B->mnls[rank]; 856 d_nz = o_nz = 0; 857 858 /* Determine preallocation for MatCreateMPIAIJ */ 859 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 860 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 861 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 862 first_diag_col = B->start_indices[rank]; 863 last_diag_col = first_diag_col + B->mnls[rank]; 864 for (i=0; i<B->mnls[rank]; i++) { 865 for (k=0; k<B->lines->len[i]; k++) { 866 global_col = B->lines->ptrs[i][k]; 867 if ((global_col >= first_diag_col) && (global_col <= last_diag_col)) 868 d_nnz[i]++; 869 else 870 o_nnz[i]++; 871 } 872 } 873 874 M = N = B->n; 875 /* Here we only know how to create AIJ format */ 876 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 877 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 878 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 879 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 880 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 881 882 for (i=0; i<B->mnls[rank]; i++) { 883 global_row = B->start_indices[rank]+i; 884 for (k=0; k<B->lines->len[i]; k++) { 885 global_col = B->lines->ptrs[i][k]; 886 val = B->lines->A[i][k]; 887 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 888 } 889 } 890 891 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 892 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 893 894 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 897 PetscFunctionReturn(0); 898 } 899 900 /**********************************************************************/ 901 902 /* 903 Converts from an SPAI vector v to a PETSc vec Pv. 904 */ 905 #undef __FUNCT__ 906 #define __FUNCT__ "ConvertVectorToVec" 907 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 908 { 909 PetscErrorCode ierr; 910 PetscMPIInt size,rank; 911 int m,M,i,*mnls,*start_indices,*global_indices; 912 913 PetscFunctionBegin; 914 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 915 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 916 917 m = v->mnl; 918 M = v->n; 919 920 921 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 922 923 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 924 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 925 926 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 927 start_indices[0] = 0; 928 for (i=1; i<size; i++) 929 start_indices[i] = start_indices[i-1] +mnls[i-1]; 930 931 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 932 for (i=0; i<v->mnl; i++) 933 global_indices[i] = start_indices[rank] + i; 934 935 ierr = PetscFree(mnls);CHKERRQ(ierr); 936 ierr = PetscFree(start_indices);CHKERRQ(ierr); 937 938 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 939 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 940 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 941 942 ierr = PetscFree(global_indices);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 947 948 949