1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 21 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include <spai.h> 30 #include <matrix.h> 31 EXTERN_C_END 32 33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(&AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 93 /* cache_size == 0 indicates no caching */ 94 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 95 verbose == 1 prints timing and matrix statistics */ 96 97 ierr = bspai(ispai->B,&ispai->M, 98 stdout, 99 ispai->epsilon, 100 ispai->nbsteps, 101 ispai->max, 102 ispai->maxnew, 103 ispai->block_size, 104 ispai->cache_size, 105 ispai->verbose);CHKERRQ(ierr); 106 107 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 108 109 /* free the SPAI matrices */ 110 sp_free_matrix(ispai->B); 111 sp_free_matrix(ispai->M); 112 PetscFunctionReturn(0); 113 } 114 115 /**********************************************************************/ 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCApply_SPAI" 119 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 120 { 121 PC_SPAI *ispai = (PC_SPAI*)pc->data; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 /* Now using PETSc's multiply */ 126 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 /**********************************************************************/ 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "PCDestroy_SPAI" 134 static PetscErrorCode PCDestroy_SPAI(PC pc) 135 { 136 PetscErrorCode ierr; 137 PC_SPAI *ispai = (PC_SPAI*)pc->data; 138 139 PetscFunctionBegin; 140 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 141 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 142 ierr = PetscFree(pc->data);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 /**********************************************************************/ 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCView_SPAI" 150 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 151 { 152 PC_SPAI *ispai = (PC_SPAI*)pc->data; 153 PetscErrorCode ierr; 154 PetscBool iascii; 155 156 PetscFunctionBegin; 157 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 158 if (iascii) { 159 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 160 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 EXTERN_C_BEGIN 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 175 PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 176 { 177 PC_SPAI *ispai = (PC_SPAI*)pc->data; 178 179 PetscFunctionBegin; 180 ispai->epsilon = epsilon1; 181 PetscFunctionReturn(0); 182 } 183 EXTERN_C_END 184 185 /**********************************************************************/ 186 187 EXTERN_C_BEGIN 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 190 PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 191 { 192 PC_SPAI *ispai = (PC_SPAI*)pc->data; 193 194 PetscFunctionBegin; 195 ispai->nbsteps = nbsteps1; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 200 /**********************************************************************/ 201 202 /* added 1/7/99 g.h. */ 203 EXTERN_C_BEGIN 204 #undef __FUNCT__ 205 #define __FUNCT__ "PCSPAISetMax_SPAI" 206 PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 207 { 208 PC_SPAI *ispai = (PC_SPAI*)pc->data; 209 210 PetscFunctionBegin; 211 ispai->max = max1; 212 PetscFunctionReturn(0); 213 } 214 EXTERN_C_END 215 216 /**********************************************************************/ 217 218 EXTERN_C_BEGIN 219 #undef __FUNCT__ 220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 221 PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 222 { 223 PC_SPAI *ispai = (PC_SPAI*)pc->data; 224 225 PetscFunctionBegin; 226 ispai->maxnew = maxnew1; 227 PetscFunctionReturn(0); 228 } 229 EXTERN_C_END 230 231 /**********************************************************************/ 232 233 EXTERN_C_BEGIN 234 #undef __FUNCT__ 235 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 236 PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 237 { 238 PC_SPAI *ispai = (PC_SPAI*)pc->data; 239 240 PetscFunctionBegin; 241 ispai->block_size = block_size1; 242 PetscFunctionReturn(0); 243 } 244 EXTERN_C_END 245 246 /**********************************************************************/ 247 248 EXTERN_C_BEGIN 249 #undef __FUNCT__ 250 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 251 PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 252 { 253 PC_SPAI *ispai = (PC_SPAI*)pc->data; 254 255 PetscFunctionBegin; 256 ispai->cache_size = cache_size; 257 PetscFunctionReturn(0); 258 } 259 EXTERN_C_END 260 261 /**********************************************************************/ 262 263 EXTERN_C_BEGIN 264 #undef __FUNCT__ 265 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 266 PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 267 { 268 PC_SPAI *ispai = (PC_SPAI*)pc->data; 269 270 PetscFunctionBegin; 271 ispai->verbose = verbose; 272 PetscFunctionReturn(0); 273 } 274 EXTERN_C_END 275 276 /**********************************************************************/ 277 278 EXTERN_C_BEGIN 279 #undef __FUNCT__ 280 #define __FUNCT__ "PCSPAISetSp_SPAI" 281 PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 282 { 283 PC_SPAI *ispai = (PC_SPAI*)pc->data; 284 285 PetscFunctionBegin; 286 ispai->sp = sp; 287 PetscFunctionReturn(0); 288 } 289 EXTERN_C_END 290 291 /* -------------------------------------------------------------------*/ 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "PCSPAISetEpsilon" 295 /*@ 296 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 297 298 Input Parameters: 299 + pc - the preconditioner 300 - eps - epsilon (default .4) 301 302 Notes: Espilon must be between 0 and 1. It controls the 303 quality of the approximation of M to the inverse of 304 A. Higher values of epsilon lead to more work, more 305 fill, and usually better preconditioners. In many 306 cases the best choice of epsilon is the one that 307 divides the total solution time equally between the 308 preconditioner and the solver. 309 310 Level: intermediate 311 312 .seealso: PCSPAI, PCSetType() 313 @*/ 314 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 315 { 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /**********************************************************************/ 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "PCSPAISetNBSteps" 327 /*@ 328 PCSPAISetNBSteps - set maximum number of improvement steps per row in 329 the SPAI preconditioner 330 331 Input Parameters: 332 + pc - the preconditioner 333 - n - number of steps (default 5) 334 335 Notes: SPAI constructs to approximation to every column of 336 the exact inverse of A in a series of improvement 337 steps. The quality of the approximation is determined 338 by epsilon. If an approximation achieving an accuracy 339 of epsilon is not obtained after ns steps, SPAI simply 340 uses the best approximation constructed so far. 341 342 Level: intermediate 343 344 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 345 @*/ 346 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 347 { 348 PetscErrorCode ierr; 349 350 PetscFunctionBegin; 351 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 /**********************************************************************/ 356 357 /* added 1/7/99 g.h. */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "PCSPAISetMax" 360 /*@ 361 PCSPAISetMax - set the size of various working buffers in 362 the SPAI preconditioner 363 364 Input Parameters: 365 + pc - the preconditioner 366 - n - size (default is 5000) 367 368 Level: intermediate 369 370 .seealso: PCSPAI, PCSetType() 371 @*/ 372 PetscErrorCode PCSPAISetMax(PC pc,int max1) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 /**********************************************************************/ 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "PCSPAISetMaxNew" 385 /*@ 386 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 387 in SPAI preconditioner 388 389 Input Parameters: 390 + pc - the preconditioner 391 - n - maximum number (default 5) 392 393 Level: intermediate 394 395 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 396 @*/ 397 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 398 { 399 PetscErrorCode ierr; 400 401 PetscFunctionBegin; 402 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 403 PetscFunctionReturn(0); 404 } 405 406 /**********************************************************************/ 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "PCSPAISetBlockSize" 410 /*@ 411 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 412 413 Input Parameters: 414 + pc - the preconditioner 415 - n - block size (default 1) 416 417 Notes: A block 418 size of 1 treats A as a matrix of scalar elements. A 419 block size of s > 1 treats A as a matrix of sxs 420 blocks. A block size of 0 treats A as a matrix with 421 variable sized blocks, which are determined by 422 searching for dense square diagonal blocks in A. 423 This can be very effective for finite-element 424 matrices. 425 426 SPAI will convert A to block form, use a block 427 version of the preconditioner algorithm, and then 428 convert the result back to scalar form. 429 430 In many cases the a block-size parameter other than 1 431 can lead to very significant improvement in 432 performance. 433 434 435 Level: intermediate 436 437 .seealso: PCSPAI, PCSetType() 438 @*/ 439 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 440 { 441 PetscErrorCode ierr; 442 443 PetscFunctionBegin; 444 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 /**********************************************************************/ 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "PCSPAISetCacheSize" 452 /*@ 453 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 454 455 Input Parameters: 456 + pc - the preconditioner 457 - n - cache size {0,1,2,3,4,5} (default 5) 458 459 Notes: SPAI uses a hash table to cache messages and avoid 460 redundant communication. If suggest always using 461 5. This parameter is irrelevant in the serial 462 version. 463 464 Level: intermediate 465 466 .seealso: PCSPAI, PCSetType() 467 @*/ 468 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 469 { 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 /**********************************************************************/ 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "PCSPAISetVerbose" 481 /*@ 482 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 483 484 Input Parameters: 485 + pc - the preconditioner 486 - n - level (default 1) 487 488 Notes: print parameters, timings and matrix statistics 489 490 Level: intermediate 491 492 .seealso: PCSPAI, PCSetType() 493 @*/ 494 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 495 { 496 PetscErrorCode ierr; 497 498 PetscFunctionBegin; 499 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 /**********************************************************************/ 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "PCSPAISetSp" 507 /*@ 508 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 509 510 Input Parameters: 511 + pc - the preconditioner 512 - n - 0 or 1 513 514 Notes: If A has a symmetric nonzero pattern use -sp 1 to 515 improve performance by eliminating some communication 516 in the parallel version. Even if A does not have a 517 symmetric nonzero pattern -sp 1 may well lead to good 518 results, but the code will not follow the published 519 SPAI algorithm exactly. 520 521 522 Level: intermediate 523 524 .seealso: PCSPAI, PCSetType() 525 @*/ 526 PetscErrorCode PCSPAISetSp(PC pc,int sp) 527 { 528 PetscErrorCode ierr; 529 530 PetscFunctionBegin; 531 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 /**********************************************************************/ 536 537 /**********************************************************************/ 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "PCSetFromOptions_SPAI" 541 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 542 { 543 PC_SPAI *ispai = (PC_SPAI*)pc->data; 544 PetscErrorCode ierr; 545 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 546 double epsilon1; 547 PetscBool flg; 548 549 PetscFunctionBegin; 550 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 551 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 552 if (flg) { 553 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 554 } 555 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 556 if (flg) { 557 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 558 } 559 /* added 1/7/99 g.h. */ 560 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 561 if (flg) { 562 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 563 } 564 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 565 if (flg) { 566 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 567 } 568 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 569 if (flg) { 570 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 571 } 572 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 573 if (flg) { 574 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 575 } 576 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 577 if (flg) { 578 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 579 } 580 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 581 if (flg) { 582 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 583 } 584 ierr = PetscOptionsTail();CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 /**********************************************************************/ 589 590 /*MC 591 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 592 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 593 594 Options Database Keys: 595 + -pc_spai_epsilon <eps> - set tolerance 596 . -pc_spai_nbstep <n> - set nbsteps 597 . -pc_spai_max <m> - set max 598 . -pc_spai_max_new <m> - set maxnew 599 . -pc_spai_block_size <n> - set block size 600 . -pc_spai_cache_size <n> - set cache size 601 . -pc_spai_sp <m> - set sp 602 - -pc_spai_set_verbose <true,false> - verbose output 603 604 Notes: This only works with AIJ matrices. 605 606 Level: beginner 607 608 Concepts: approximate inverse 609 610 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 611 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 612 PCSPAISetVerbose(), PCSPAISetSp() 613 M*/ 614 615 EXTERN_C_BEGIN 616 #undef __FUNCT__ 617 #define __FUNCT__ "PCCreate_SPAI" 618 PetscErrorCode PCCreate_SPAI(PC pc) 619 { 620 PC_SPAI *ispai; 621 PetscErrorCode ierr; 622 623 PetscFunctionBegin; 624 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 625 pc->data = ispai; 626 627 pc->ops->destroy = PCDestroy_SPAI; 628 pc->ops->apply = PCApply_SPAI; 629 pc->ops->applyrichardson = 0; 630 pc->ops->setup = PCSetUp_SPAI; 631 pc->ops->view = PCView_SPAI; 632 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 633 634 ispai->epsilon = .4; 635 ispai->nbsteps = 5; 636 ispai->max = 5000; 637 ispai->maxnew = 5; 638 ispai->block_size = 1; 639 ispai->cache_size = 5; 640 ispai->verbose = 0; 641 642 ispai->sp = 1; 643 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 644 645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 646 "PCSPAISetEpsilon_SPAI", 647 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 649 "PCSPAISetNBSteps_SPAI", 650 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 652 "PCSPAISetMax_SPAI", 653 PCSPAISetMax_SPAI);CHKERRQ(ierr); 654 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 655 "PCSPAISetMaxNew_SPAI", 656 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 658 "PCSPAISetBlockSize_SPAI", 659 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 661 "PCSPAISetCacheSize_SPAI", 662 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 664 "PCSPAISetVerbose_SPAI", 665 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 667 "PCSPAISetSp_SPAI", 668 PCSPAISetSp_SPAI);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 EXTERN_C_END 672 673 /**********************************************************************/ 674 675 /* 676 Converts from a PETSc matrix to an SPAI matrix 677 */ 678 #undef __FUNCT__ 679 #define __FUNCT__ "ConvertMatToMatrix" 680 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 681 { 682 matrix *M; 683 int i,j,col; 684 int row_indx; 685 int len,pe,local_indx,start_indx; 686 int *mapping; 687 PetscErrorCode ierr; 688 const int *cols; 689 const double *vals; 690 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 691 PetscMPIInt size,rank; 692 struct compressed_lines *rows; 693 694 PetscFunctionBegin; 695 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 696 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 697 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 698 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 699 700 /* 701 not sure why a barrier is required. commenting out 702 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 703 */ 704 705 M = new_matrix((SPAI_Comm)comm); 706 707 M->n = n; 708 M->bs = 1; 709 M->max_block_size = 1; 710 711 M->mnls = (int*)malloc(sizeof(int)*size); 712 M->start_indices = (int*)malloc(sizeof(int)*size); 713 M->pe = (int*)malloc(sizeof(int)*n); 714 M->block_sizes = (int*)malloc(sizeof(int)*n); 715 for (i=0; i<n; i++) M->block_sizes[i] = 1; 716 717 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 718 719 M->start_indices[0] = 0; 720 for (i=1; i<size; i++) { 721 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 722 } 723 724 M->mnl = M->mnls[M->myid]; 725 M->my_start_index = M->start_indices[M->myid]; 726 727 for (i=0; i<size; i++) { 728 start_indx = M->start_indices[i]; 729 for (j=0; j<M->mnls[i]; j++) 730 M->pe[start_indx+j] = i; 731 } 732 733 if (AT) { 734 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 735 } else { 736 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 737 } 738 739 rows = M->lines; 740 741 /* Determine the mapping from global indices to pointers */ 742 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 743 pe = 0; 744 local_indx = 0; 745 for (i=0; i<M->n; i++) { 746 if (local_indx >= M->mnls[pe]) { 747 pe++; 748 local_indx = 0; 749 } 750 mapping[i] = local_indx + M->start_indices[pe]; 751 local_indx++; 752 } 753 754 755 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 756 757 /*********************************************************/ 758 /************** Set up the row structure *****************/ 759 /*********************************************************/ 760 761 /* count number of nonzeros in every row */ 762 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 763 for (i=rstart; i<rend; i++) { 764 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 765 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 766 } 767 768 /* allocate buffers */ 769 len = 0; 770 for (i=0; i<mnl; i++) { 771 if (len < num_ptr[i]) len = num_ptr[i]; 772 } 773 774 for (i=rstart; i<rend; i++) { 775 row_indx = i-rstart; 776 len = num_ptr[row_indx]; 777 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 778 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 779 } 780 781 /* copy the matrix */ 782 for (i=rstart; i<rend; i++) { 783 row_indx = i - rstart; 784 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 785 for (j=0; j<nz; j++) { 786 col = cols[j]; 787 len = rows->len[row_indx]++; 788 rows->ptrs[row_indx][len] = mapping[col]; 789 rows->A[row_indx][len] = vals[j]; 790 } 791 rows->slen[row_indx] = rows->len[row_indx]; 792 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 793 } 794 795 796 /************************************************************/ 797 /************** Set up the column structure *****************/ 798 /*********************************************************/ 799 800 if (AT) { 801 802 /* count number of nonzeros in every column */ 803 for (i=rstart; i<rend; i++) { 804 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 805 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 806 } 807 808 /* allocate buffers */ 809 len = 0; 810 for (i=0; i<mnl; i++) { 811 if (len < num_ptr[i]) len = num_ptr[i]; 812 } 813 814 for (i=rstart; i<rend; i++) { 815 row_indx = i-rstart; 816 len = num_ptr[row_indx]; 817 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 818 } 819 820 /* copy the matrix (i.e., the structure) */ 821 for (i=rstart; i<rend; i++) { 822 row_indx = i - rstart; 823 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 824 for (j=0; j<nz; j++) { 825 col = cols[j]; 826 len = rows->rlen[row_indx]++; 827 rows->rptrs[row_indx][len] = mapping[col]; 828 } 829 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 830 } 831 } 832 833 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 834 ierr = PetscFree(mapping);CHKERRQ(ierr); 835 836 order_pointers(M); 837 M->maxnz = calc_maxnz(M); 838 *B = M; 839 PetscFunctionReturn(0); 840 } 841 842 /**********************************************************************/ 843 844 /* 845 Converts from an SPAI matrix B to a PETSc matrix PB. 846 This assumes that the the SPAI matrix B is stored in 847 COMPRESSED-ROW format. 848 */ 849 #undef __FUNCT__ 850 #define __FUNCT__ "ConvertMatrixToMat" 851 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 852 { 853 PetscMPIInt size,rank; 854 PetscErrorCode ierr; 855 int m,n,M,N; 856 int d_nz,o_nz; 857 int *d_nnz,*o_nnz; 858 int i,k,global_row,global_col,first_diag_col,last_diag_col; 859 PetscScalar val; 860 861 PetscFunctionBegin; 862 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 863 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 864 865 m = n = B->mnls[rank]; 866 d_nz = o_nz = 0; 867 868 /* Determine preallocation for MatCreateMPIAIJ */ 869 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 870 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 871 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 872 first_diag_col = B->start_indices[rank]; 873 last_diag_col = first_diag_col + B->mnls[rank]; 874 for (i=0; i<B->mnls[rank]; i++) { 875 for (k=0; k<B->lines->len[i]; k++) { 876 global_col = B->lines->ptrs[i][k]; 877 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 878 else o_nnz[i]++; 879 } 880 } 881 882 M = N = B->n; 883 /* Here we only know how to create AIJ format */ 884 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 885 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 886 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 887 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 888 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 889 890 for (i=0; i<B->mnls[rank]; i++) { 891 global_row = B->start_indices[rank]+i; 892 for (k=0; k<B->lines->len[i]; k++) { 893 global_col = B->lines->ptrs[i][k]; 894 val = B->lines->A[i][k]; 895 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 896 } 897 } 898 899 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 900 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 901 902 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 /**********************************************************************/ 908 909 /* 910 Converts from an SPAI vector v to a PETSc vec Pv. 911 */ 912 #undef __FUNCT__ 913 #define __FUNCT__ "ConvertVectorToVec" 914 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 915 { 916 PetscErrorCode ierr; 917 PetscMPIInt size,rank; 918 int m,M,i,*mnls,*start_indices,*global_indices; 919 920 PetscFunctionBegin; 921 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 922 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 923 924 m = v->mnl; 925 M = v->n; 926 927 928 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 929 930 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 931 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 932 933 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 934 start_indices[0] = 0; 935 for (i=1; i<size; i++) 936 start_indices[i] = start_indices[i-1] +mnls[i-1]; 937 938 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 939 for (i=0; i<v->mnl; i++) 940 global_indices[i] = start_indices[rank] + i; 941 942 ierr = PetscFree(mnls);CHKERRQ(ierr); 943 ierr = PetscFree(start_indices);CHKERRQ(ierr); 944 945 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 946 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 947 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 948 949 ierr = PetscFree(global_indices);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 954 955 956