1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 21 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include <spai.h> 30 #include <matrix.h> 31 EXTERN_C_END 32 33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(&AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 93 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 94 verbose == 1 prints timing and matrix statistics */ 95 96 ierr = bspai(ispai->B,&ispai->M, 97 stdout, 98 ispai->epsilon, 99 ispai->nbsteps, 100 ispai->max, 101 ispai->maxnew, 102 ispai->block_size, 103 ispai->cache_size, 104 ispai->verbose);CHKERRQ(ierr); 105 106 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 107 108 /* free the SPAI matrices */ 109 sp_free_matrix(ispai->B); 110 sp_free_matrix(ispai->M); 111 PetscFunctionReturn(0); 112 } 113 114 /**********************************************************************/ 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCApply_SPAI" 118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 119 { 120 PC_SPAI *ispai = (PC_SPAI*)pc->data; 121 PetscErrorCode ierr; 122 123 PetscFunctionBegin; 124 /* Now using PETSc's multiply */ 125 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 /**********************************************************************/ 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "PCDestroy_SPAI" 133 static PetscErrorCode PCDestroy_SPAI(PC pc) 134 { 135 PetscErrorCode ierr; 136 PC_SPAI *ispai = (PC_SPAI*)pc->data; 137 138 PetscFunctionBegin; 139 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 140 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 141 ierr = PetscFree(pc->data);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 /**********************************************************************/ 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "PCView_SPAI" 149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 150 { 151 PC_SPAI *ispai = (PC_SPAI*)pc->data; 152 PetscErrorCode ierr; 153 PetscBool iascii; 154 155 PetscFunctionBegin; 156 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 157 if (iascii) { 158 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 159 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 160 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 EXTERN_C_BEGIN 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 174 PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 175 { 176 PC_SPAI *ispai = (PC_SPAI*)pc->data; 177 178 PetscFunctionBegin; 179 ispai->epsilon = epsilon1; 180 PetscFunctionReturn(0); 181 } 182 EXTERN_C_END 183 184 /**********************************************************************/ 185 186 EXTERN_C_BEGIN 187 #undef __FUNCT__ 188 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 189 PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 190 { 191 PC_SPAI *ispai = (PC_SPAI*)pc->data; 192 193 PetscFunctionBegin; 194 ispai->nbsteps = nbsteps1; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 199 /**********************************************************************/ 200 201 /* added 1/7/99 g.h. */ 202 EXTERN_C_BEGIN 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCSPAISetMax_SPAI" 205 PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 206 { 207 PC_SPAI *ispai = (PC_SPAI*)pc->data; 208 209 PetscFunctionBegin; 210 ispai->max = max1; 211 PetscFunctionReturn(0); 212 } 213 EXTERN_C_END 214 215 /**********************************************************************/ 216 217 EXTERN_C_BEGIN 218 #undef __FUNCT__ 219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 220 PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 221 { 222 PC_SPAI *ispai = (PC_SPAI*)pc->data; 223 224 PetscFunctionBegin; 225 ispai->maxnew = maxnew1; 226 PetscFunctionReturn(0); 227 } 228 EXTERN_C_END 229 230 /**********************************************************************/ 231 232 EXTERN_C_BEGIN 233 #undef __FUNCT__ 234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 235 PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 236 { 237 PC_SPAI *ispai = (PC_SPAI*)pc->data; 238 239 PetscFunctionBegin; 240 ispai->block_size = block_size1; 241 PetscFunctionReturn(0); 242 } 243 EXTERN_C_END 244 245 /**********************************************************************/ 246 247 EXTERN_C_BEGIN 248 #undef __FUNCT__ 249 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 250 PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 251 { 252 PC_SPAI *ispai = (PC_SPAI*)pc->data; 253 254 PetscFunctionBegin; 255 ispai->cache_size = cache_size; 256 PetscFunctionReturn(0); 257 } 258 EXTERN_C_END 259 260 /**********************************************************************/ 261 262 EXTERN_C_BEGIN 263 #undef __FUNCT__ 264 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 265 PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 266 { 267 PC_SPAI *ispai = (PC_SPAI*)pc->data; 268 269 PetscFunctionBegin; 270 ispai->verbose = verbose; 271 PetscFunctionReturn(0); 272 } 273 EXTERN_C_END 274 275 /**********************************************************************/ 276 277 EXTERN_C_BEGIN 278 #undef __FUNCT__ 279 #define __FUNCT__ "PCSPAISetSp_SPAI" 280 PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 281 { 282 PC_SPAI *ispai = (PC_SPAI*)pc->data; 283 284 PetscFunctionBegin; 285 ispai->sp = sp; 286 PetscFunctionReturn(0); 287 } 288 EXTERN_C_END 289 290 /* -------------------------------------------------------------------*/ 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "PCSPAISetEpsilon" 294 /*@ 295 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 296 297 Input Parameters: 298 + pc - the preconditioner 299 - eps - epsilon (default .4) 300 301 Notes: Espilon must be between 0 and 1. It controls the 302 quality of the approximation of M to the inverse of 303 A. Higher values of epsilon lead to more work, more 304 fill, and usually better preconditioners. In many 305 cases the best choice of epsilon is the one that 306 divides the total solution time equally between the 307 preconditioner and the solver. 308 309 Level: intermediate 310 311 .seealso: PCSPAI, PCSetType() 312 @*/ 313 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 314 { 315 PetscErrorCode ierr; 316 317 PetscFunctionBegin; 318 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 /**********************************************************************/ 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCSPAISetNBSteps" 326 /*@ 327 PCSPAISetNBSteps - set maximum number of improvement steps per row in 328 the SPAI preconditioner 329 330 Input Parameters: 331 + pc - the preconditioner 332 - n - number of steps (default 5) 333 334 Notes: SPAI constructs to approximation to every column of 335 the exact inverse of A in a series of improvement 336 steps. The quality of the approximation is determined 337 by epsilon. If an approximation achieving an accuracy 338 of epsilon is not obtained after ns steps, SPAI simply 339 uses the best approximation constructed so far. 340 341 Level: intermediate 342 343 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 344 @*/ 345 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 346 { 347 PetscErrorCode ierr; 348 349 PetscFunctionBegin; 350 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 /**********************************************************************/ 355 356 /* added 1/7/99 g.h. */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCSPAISetMax" 359 /*@ 360 PCSPAISetMax - set the size of various working buffers in 361 the SPAI preconditioner 362 363 Input Parameters: 364 + pc - the preconditioner 365 - n - size (default is 5000) 366 367 Level: intermediate 368 369 .seealso: PCSPAI, PCSetType() 370 @*/ 371 PetscErrorCode PCSPAISetMax(PC pc,int max1) 372 { 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 /**********************************************************************/ 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "PCSPAISetMaxNew" 384 /*@ 385 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 386 in SPAI preconditioner 387 388 Input Parameters: 389 + pc - the preconditioner 390 - n - maximum number (default 5) 391 392 Level: intermediate 393 394 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 395 @*/ 396 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 397 { 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 /**********************************************************************/ 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "PCSPAISetBlockSize" 409 /*@ 410 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 411 412 Input Parameters: 413 + pc - the preconditioner 414 - n - block size (default 1) 415 416 Notes: A block 417 size of 1 treats A as a matrix of scalar elements. A 418 block size of s > 1 treats A as a matrix of sxs 419 blocks. A block size of 0 treats A as a matrix with 420 variable sized blocks, which are determined by 421 searching for dense square diagonal blocks in A. 422 This can be very effective for finite-element 423 matrices. 424 425 SPAI will convert A to block form, use a block 426 version of the preconditioner algorithm, and then 427 convert the result back to scalar form. 428 429 In many cases the a block-size parameter other than 1 430 can lead to very significant improvement in 431 performance. 432 433 434 Level: intermediate 435 436 .seealso: PCSPAI, PCSetType() 437 @*/ 438 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 439 { 440 PetscErrorCode ierr; 441 442 PetscFunctionBegin; 443 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 /**********************************************************************/ 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "PCSPAISetCacheSize" 451 /*@ 452 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 453 454 Input Parameters: 455 + pc - the preconditioner 456 - n - cache size {0,1,2,3,4,5} (default 5) 457 458 Notes: SPAI uses a hash table to cache messages and avoid 459 redundant communication. If suggest always using 460 5. This parameter is irrelevant in the serial 461 version. 462 463 Level: intermediate 464 465 .seealso: PCSPAI, PCSetType() 466 @*/ 467 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 468 { 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 /**********************************************************************/ 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCSPAISetVerbose" 480 /*@ 481 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 482 483 Input Parameters: 484 + pc - the preconditioner 485 - n - level (default 1) 486 487 Notes: print parameters, timings and matrix statistics 488 489 Level: intermediate 490 491 .seealso: PCSPAI, PCSetType() 492 @*/ 493 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 494 { 495 PetscErrorCode ierr; 496 497 PetscFunctionBegin; 498 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 /**********************************************************************/ 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCSPAISetSp" 506 /*@ 507 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 508 509 Input Parameters: 510 + pc - the preconditioner 511 - n - 0 or 1 512 513 Notes: If A has a symmetric nonzero pattern use -sp 1 to 514 improve performance by eliminating some communication 515 in the parallel version. Even if A does not have a 516 symmetric nonzero pattern -sp 1 may well lead to good 517 results, but the code will not follow the published 518 SPAI algorithm exactly. 519 520 521 Level: intermediate 522 523 .seealso: PCSPAI, PCSetType() 524 @*/ 525 PetscErrorCode PCSPAISetSp(PC pc,int sp) 526 { 527 PetscErrorCode ierr; 528 529 PetscFunctionBegin; 530 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 /**********************************************************************/ 535 536 /**********************************************************************/ 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCSetFromOptions_SPAI" 540 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 541 { 542 PC_SPAI *ispai = (PC_SPAI*)pc->data; 543 PetscErrorCode ierr; 544 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 545 double epsilon1; 546 PetscBool flg; 547 548 PetscFunctionBegin; 549 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 550 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 551 if (flg) { 552 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 553 } 554 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 555 if (flg) { 556 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 557 } 558 /* added 1/7/99 g.h. */ 559 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 560 if (flg) { 561 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 562 } 563 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 564 if (flg) { 565 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 566 } 567 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 568 if (flg) { 569 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 570 } 571 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 572 if (flg) { 573 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 574 } 575 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 576 if (flg) { 577 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 578 } 579 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 580 if (flg) { 581 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 582 } 583 ierr = PetscOptionsTail();CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 /**********************************************************************/ 588 589 /*MC 590 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 591 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 592 593 Options Database Keys: 594 + -pc_spai_epsilon <eps> - set tolerance 595 . -pc_spai_nbstep <n> - set nbsteps 596 . -pc_spai_max <m> - set max 597 . -pc_spai_max_new <m> - set maxnew 598 . -pc_spai_block_size <n> - set block size 599 . -pc_spai_cache_size <n> - set cache size 600 . -pc_spai_sp <m> - set sp 601 - -pc_spai_set_verbose <true,false> - verbose output 602 603 Notes: This only works with AIJ matrices. 604 605 Level: beginner 606 607 Concepts: approximate inverse 608 609 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 610 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 611 PCSPAISetVerbose(), PCSPAISetSp() 612 M*/ 613 614 EXTERN_C_BEGIN 615 #undef __FUNCT__ 616 #define __FUNCT__ "PCCreate_SPAI" 617 PetscErrorCode PCCreate_SPAI(PC pc) 618 { 619 PC_SPAI *ispai; 620 PetscErrorCode ierr; 621 622 PetscFunctionBegin; 623 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 624 pc->data = ispai; 625 626 pc->ops->destroy = PCDestroy_SPAI; 627 pc->ops->apply = PCApply_SPAI; 628 pc->ops->applyrichardson = 0; 629 pc->ops->setup = PCSetUp_SPAI; 630 pc->ops->view = PCView_SPAI; 631 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 632 633 ispai->epsilon = .4; 634 ispai->nbsteps = 5; 635 ispai->max = 5000; 636 ispai->maxnew = 5; 637 ispai->block_size = 1; 638 ispai->cache_size = 5; 639 ispai->verbose = 0; 640 641 ispai->sp = 1; 642 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 643 644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 645 "PCSPAISetEpsilon_SPAI", 646 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 647 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 648 "PCSPAISetNBSteps_SPAI", 649 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 650 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 651 "PCSPAISetMax_SPAI", 652 PCSPAISetMax_SPAI);CHKERRQ(ierr); 653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 654 "PCSPAISetMaxNew_SPAI", 655 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 657 "PCSPAISetBlockSize_SPAI", 658 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 660 "PCSPAISetCacheSize_SPAI", 661 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 663 "PCSPAISetVerbose_SPAI", 664 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 666 "PCSPAISetSp_SPAI", 667 PCSPAISetSp_SPAI);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 EXTERN_C_END 671 672 /**********************************************************************/ 673 674 /* 675 Converts from a PETSc matrix to an SPAI matrix 676 */ 677 #undef __FUNCT__ 678 #define __FUNCT__ "ConvertMatToMatrix" 679 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 680 { 681 matrix *M; 682 int i,j,col; 683 int row_indx; 684 int len,pe,local_indx,start_indx; 685 int *mapping; 686 PetscErrorCode ierr; 687 const int *cols; 688 const double *vals; 689 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 690 PetscMPIInt size,rank; 691 struct compressed_lines *rows; 692 693 PetscFunctionBegin; 694 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 695 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 696 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 697 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 698 699 /* 700 not sure why a barrier is required. commenting out 701 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 702 */ 703 704 M = new_matrix((SPAI_Comm)comm); 705 706 M->n = n; 707 M->bs = 1; 708 M->max_block_size = 1; 709 710 M->mnls = (int*)malloc(sizeof(int)*size); 711 M->start_indices = (int*)malloc(sizeof(int)*size); 712 M->pe = (int*)malloc(sizeof(int)*n); 713 M->block_sizes = (int*)malloc(sizeof(int)*n); 714 for (i=0; i<n; i++) M->block_sizes[i] = 1; 715 716 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 717 718 M->start_indices[0] = 0; 719 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 720 721 M->mnl = M->mnls[M->myid]; 722 M->my_start_index = M->start_indices[M->myid]; 723 724 for (i=0; i<size; i++) { 725 start_indx = M->start_indices[i]; 726 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 727 } 728 729 if (AT) { 730 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 731 } else { 732 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 733 } 734 735 rows = M->lines; 736 737 /* Determine the mapping from global indices to pointers */ 738 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 739 pe = 0; 740 local_indx = 0; 741 for (i=0; i<M->n; i++) { 742 if (local_indx >= M->mnls[pe]) { 743 pe++; 744 local_indx = 0; 745 } 746 mapping[i] = local_indx + M->start_indices[pe]; 747 local_indx++; 748 } 749 750 751 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 752 753 /*********************************************************/ 754 /************** Set up the row structure *****************/ 755 /*********************************************************/ 756 757 /* count number of nonzeros in every row */ 758 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 759 for (i=rstart; i<rend; i++) { 760 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 761 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 762 } 763 764 /* allocate buffers */ 765 len = 0; 766 for (i=0; i<mnl; i++) { 767 if (len < num_ptr[i]) len = num_ptr[i]; 768 } 769 770 for (i=rstart; i<rend; i++) { 771 row_indx = i-rstart; 772 len = num_ptr[row_indx]; 773 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 774 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 775 } 776 777 /* copy the matrix */ 778 for (i=rstart; i<rend; i++) { 779 row_indx = i - rstart; 780 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 781 for (j=0; j<nz; j++) { 782 col = cols[j]; 783 len = rows->len[row_indx]++; 784 785 rows->ptrs[row_indx][len] = mapping[col]; 786 rows->A[row_indx][len] = vals[j]; 787 } 788 rows->slen[row_indx] = rows->len[row_indx]; 789 790 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 791 } 792 793 794 /************************************************************/ 795 /************** Set up the column structure *****************/ 796 /*********************************************************/ 797 798 if (AT) { 799 800 /* count number of nonzeros in every column */ 801 for (i=rstart; i<rend; i++) { 802 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 803 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 804 } 805 806 /* allocate buffers */ 807 len = 0; 808 for (i=0; i<mnl; i++) { 809 if (len < num_ptr[i]) len = num_ptr[i]; 810 } 811 812 for (i=rstart; i<rend; i++) { 813 row_indx = i-rstart; 814 len = num_ptr[row_indx]; 815 816 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 817 } 818 819 /* copy the matrix (i.e., the structure) */ 820 for (i=rstart; i<rend; i++) { 821 row_indx = i - rstart; 822 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 823 for (j=0; j<nz; j++) { 824 col = cols[j]; 825 len = rows->rlen[row_indx]++; 826 827 rows->rptrs[row_indx][len] = mapping[col]; 828 } 829 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 830 } 831 } 832 833 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 834 ierr = PetscFree(mapping);CHKERRQ(ierr); 835 836 order_pointers(M); 837 M->maxnz = calc_maxnz(M); 838 *B = M; 839 PetscFunctionReturn(0); 840 } 841 842 /**********************************************************************/ 843 844 /* 845 Converts from an SPAI matrix B to a PETSc matrix PB. 846 This assumes that the the SPAI matrix B is stored in 847 COMPRESSED-ROW format. 848 */ 849 #undef __FUNCT__ 850 #define __FUNCT__ "ConvertMatrixToMat" 851 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 852 { 853 PetscMPIInt size,rank; 854 PetscErrorCode ierr; 855 int m,n,M,N; 856 int d_nz,o_nz; 857 int *d_nnz,*o_nnz; 858 int i,k,global_row,global_col,first_diag_col,last_diag_col; 859 PetscScalar val; 860 861 PetscFunctionBegin; 862 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 863 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 864 865 m = n = B->mnls[rank]; 866 d_nz = o_nz = 0; 867 868 /* Determine preallocation for MatCreateMPIAIJ */ 869 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 870 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 871 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 872 first_diag_col = B->start_indices[rank]; 873 last_diag_col = first_diag_col + B->mnls[rank]; 874 for (i=0; i<B->mnls[rank]; i++) { 875 for (k=0; k<B->lines->len[i]; k++) { 876 global_col = B->lines->ptrs[i][k]; 877 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 878 else o_nnz[i]++; 879 } 880 } 881 882 M = N = B->n; 883 /* Here we only know how to create AIJ format */ 884 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 885 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 886 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 887 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 888 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 889 890 for (i=0; i<B->mnls[rank]; i++) { 891 global_row = B->start_indices[rank]+i; 892 for (k=0; k<B->lines->len[i]; k++) { 893 global_col = B->lines->ptrs[i][k]; 894 895 val = B->lines->A[i][k]; 896 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 897 } 898 } 899 900 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 901 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 902 903 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 904 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 /**********************************************************************/ 909 910 /* 911 Converts from an SPAI vector v to a PETSc vec Pv. 912 */ 913 #undef __FUNCT__ 914 #define __FUNCT__ "ConvertVectorToVec" 915 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 916 { 917 PetscErrorCode ierr; 918 PetscMPIInt size,rank; 919 int m,M,i,*mnls,*start_indices,*global_indices; 920 921 PetscFunctionBegin; 922 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 923 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 924 925 m = v->mnl; 926 M = v->n; 927 928 929 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 930 931 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 932 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 933 934 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 935 936 start_indices[0] = 0; 937 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 938 939 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 940 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 941 942 ierr = PetscFree(mnls);CHKERRQ(ierr); 943 ierr = PetscFree(start_indices);CHKERRQ(ierr); 944 945 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 946 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 947 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 948 949 ierr = PetscFree(global_indices);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 954 955 956