1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 21 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 22 #include "petscspai.h" 23 24 /* 25 These are the SPAI include files 26 */ 27 EXTERN_C_BEGIN 28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */ 29 #include <spai.h> 30 #include <matrix.h> 31 EXTERN_C_END 32 33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *); 35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *); 37 38 typedef struct { 39 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCSetUp_SPAI" 62 static PetscErrorCode PCSetUp_SPAI(PC pc) 63 { 64 PC_SPAI *ispai = (PC_SPAI*)pc->data; 65 PetscErrorCode ierr; 66 Mat AT; 67 68 PetscFunctionBegin; 69 init_SPAI(); 70 71 if (ispai->sp) { 72 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 73 } else { 74 /* Use the transpose to get the column nonzero structure. */ 75 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 76 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 77 ierr = MatDestroy(&AT);CHKERRQ(ierr); 78 } 79 80 /* Destroy the transpose */ 81 /* Don't know how to do it. PETSc developers? */ 82 83 /* construct SPAI preconditioner */ 84 /* FILE *messages */ /* file for warning messages */ 85 /* double epsilon */ /* tolerance */ 86 /* int nbsteps */ /* max number of "improvement" steps per line */ 87 /* int max */ /* max dimensions of is_I, q, etc. */ 88 /* int maxnew */ /* max number of new entries per step */ 89 /* int block_size */ /* block_size == 1 specifies scalar elments 90 block_size == n specifies nxn constant-block elements 91 block_size == 0 specifies variable-block elements */ 92 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */ 93 /* cache_size == 0 indicates no caching */ 94 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 95 verbose == 1 prints timing and matrix statistics */ 96 97 ierr = bspai(ispai->B,&ispai->M, 98 stdout, 99 ispai->epsilon, 100 ispai->nbsteps, 101 ispai->max, 102 ispai->maxnew, 103 ispai->block_size, 104 ispai->cache_size, 105 ispai->verbose);CHKERRQ(ierr); 106 107 ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr); 108 109 /* free the SPAI matrices */ 110 sp_free_matrix(ispai->B); 111 sp_free_matrix(ispai->M); 112 113 PetscFunctionReturn(0); 114 } 115 116 /**********************************************************************/ 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCApply_SPAI" 120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 121 { 122 PC_SPAI *ispai = (PC_SPAI*)pc->data; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 /* Now using PETSc's multiply */ 127 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /**********************************************************************/ 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "PCDestroy_SPAI" 135 static PetscErrorCode PCDestroy_SPAI(PC pc) 136 { 137 PetscErrorCode ierr; 138 PC_SPAI *ispai = (PC_SPAI*)pc->data; 139 140 PetscFunctionBegin; 141 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 142 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 143 ierr = PetscFree(pc->data);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /**********************************************************************/ 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCView_SPAI" 151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 152 { 153 PC_SPAI *ispai = (PC_SPAI*)pc->data; 154 PetscErrorCode ierr; 155 PetscBool iascii; 156 157 PetscFunctionBegin; 158 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 159 if (iascii) { 160 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 161 ierr = PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);CHKERRQ(ierr); 162 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 EXTERN_C_BEGIN 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCSPAISetEpsilon_SPAI" 176 PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 177 { 178 PC_SPAI *ispai = (PC_SPAI*)pc->data; 179 PetscFunctionBegin; 180 ispai->epsilon = epsilon1; 181 PetscFunctionReturn(0); 182 } 183 EXTERN_C_END 184 185 /**********************************************************************/ 186 187 EXTERN_C_BEGIN 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCSPAISetNBSteps_SPAI" 190 PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 191 { 192 PC_SPAI *ispai = (PC_SPAI*)pc->data; 193 PetscFunctionBegin; 194 ispai->nbsteps = nbsteps1; 195 PetscFunctionReturn(0); 196 } 197 EXTERN_C_END 198 199 /**********************************************************************/ 200 201 /* added 1/7/99 g.h. */ 202 EXTERN_C_BEGIN 203 #undef __FUNCT__ 204 #define __FUNCT__ "PCSPAISetMax_SPAI" 205 PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 206 { 207 PC_SPAI *ispai = (PC_SPAI*)pc->data; 208 PetscFunctionBegin; 209 ispai->max = max1; 210 PetscFunctionReturn(0); 211 } 212 EXTERN_C_END 213 214 /**********************************************************************/ 215 216 EXTERN_C_BEGIN 217 #undef __FUNCT__ 218 #define __FUNCT__ "PCSPAISetMaxNew_SPAI" 219 PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 220 { 221 PC_SPAI *ispai = (PC_SPAI*)pc->data; 222 PetscFunctionBegin; 223 ispai->maxnew = maxnew1; 224 PetscFunctionReturn(0); 225 } 226 EXTERN_C_END 227 228 /**********************************************************************/ 229 230 EXTERN_C_BEGIN 231 #undef __FUNCT__ 232 #define __FUNCT__ "PCSPAISetBlockSize_SPAI" 233 PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 234 { 235 PC_SPAI *ispai = (PC_SPAI*)pc->data; 236 PetscFunctionBegin; 237 ispai->block_size = block_size1; 238 PetscFunctionReturn(0); 239 } 240 EXTERN_C_END 241 242 /**********************************************************************/ 243 244 EXTERN_C_BEGIN 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCSPAISetCacheSize_SPAI" 247 PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 248 { 249 PC_SPAI *ispai = (PC_SPAI*)pc->data; 250 PetscFunctionBegin; 251 ispai->cache_size = cache_size; 252 PetscFunctionReturn(0); 253 } 254 EXTERN_C_END 255 256 /**********************************************************************/ 257 258 EXTERN_C_BEGIN 259 #undef __FUNCT__ 260 #define __FUNCT__ "PCSPAISetVerbose_SPAI" 261 PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 262 { 263 PC_SPAI *ispai = (PC_SPAI*)pc->data; 264 PetscFunctionBegin; 265 ispai->verbose = verbose; 266 PetscFunctionReturn(0); 267 } 268 EXTERN_C_END 269 270 /**********************************************************************/ 271 272 EXTERN_C_BEGIN 273 #undef __FUNCT__ 274 #define __FUNCT__ "PCSPAISetSp_SPAI" 275 PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 276 { 277 PC_SPAI *ispai = (PC_SPAI*)pc->data; 278 PetscFunctionBegin; 279 ispai->sp = sp; 280 PetscFunctionReturn(0); 281 } 282 EXTERN_C_END 283 284 /* -------------------------------------------------------------------*/ 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "PCSPAISetEpsilon" 288 /*@ 289 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 290 291 Input Parameters: 292 + pc - the preconditioner 293 - eps - epsilon (default .4) 294 295 Notes: Espilon must be between 0 and 1. It controls the 296 quality of the approximation of M to the inverse of 297 A. Higher values of epsilon lead to more work, more 298 fill, and usually better preconditioners. In many 299 cases the best choice of epsilon is the one that 300 divides the total solution time equally between the 301 preconditioner and the solver. 302 303 Level: intermediate 304 305 .seealso: PCSPAI, PCSetType() 306 @*/ 307 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 308 { 309 PetscErrorCode ierr; 310 PetscFunctionBegin; 311 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 /**********************************************************************/ 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "PCSPAISetNBSteps" 319 /*@ 320 PCSPAISetNBSteps - set maximum number of improvement steps per row in 321 the SPAI preconditioner 322 323 Input Parameters: 324 + pc - the preconditioner 325 - n - number of steps (default 5) 326 327 Notes: SPAI constructs to approximation to every column of 328 the exact inverse of A in a series of improvement 329 steps. The quality of the approximation is determined 330 by epsilon. If an approximation achieving an accuracy 331 of epsilon is not obtained after ns steps, SPAI simply 332 uses the best approximation constructed so far. 333 334 Level: intermediate 335 336 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 337 @*/ 338 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 339 { 340 PetscErrorCode ierr; 341 PetscFunctionBegin; 342 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 /**********************************************************************/ 347 348 /* added 1/7/99 g.h. */ 349 #undef __FUNCT__ 350 #define __FUNCT__ "PCSPAISetMax" 351 /*@ 352 PCSPAISetMax - set the size of various working buffers in 353 the SPAI preconditioner 354 355 Input Parameters: 356 + pc - the preconditioner 357 - n - size (default is 5000) 358 359 Level: intermediate 360 361 .seealso: PCSPAI, PCSetType() 362 @*/ 363 PetscErrorCode PCSPAISetMax(PC pc,int max1) 364 { 365 PetscErrorCode ierr; 366 PetscFunctionBegin; 367 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 /**********************************************************************/ 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "PCSPAISetMaxNew" 375 /*@ 376 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 377 in SPAI preconditioner 378 379 Input Parameters: 380 + pc - the preconditioner 381 - n - maximum number (default 5) 382 383 Level: intermediate 384 385 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 386 @*/ 387 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 388 { 389 PetscErrorCode ierr; 390 PetscFunctionBegin; 391 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 392 PetscFunctionReturn(0); 393 } 394 395 /**********************************************************************/ 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCSPAISetBlockSize" 399 /*@ 400 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 401 402 Input Parameters: 403 + pc - the preconditioner 404 - n - block size (default 1) 405 406 Notes: A block 407 size of 1 treats A as a matrix of scalar elements. A 408 block size of s > 1 treats A as a matrix of sxs 409 blocks. A block size of 0 treats A as a matrix with 410 variable sized blocks, which are determined by 411 searching for dense square diagonal blocks in A. 412 This can be very effective for finite-element 413 matrices. 414 415 SPAI will convert A to block form, use a block 416 version of the preconditioner algorithm, and then 417 convert the result back to scalar form. 418 419 In many cases the a block-size parameter other than 1 420 can lead to very significant improvement in 421 performance. 422 423 424 Level: intermediate 425 426 .seealso: PCSPAI, PCSetType() 427 @*/ 428 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 429 { 430 PetscErrorCode ierr; 431 PetscFunctionBegin; 432 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 436 /**********************************************************************/ 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCSPAISetCacheSize" 440 /*@ 441 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 442 443 Input Parameters: 444 + pc - the preconditioner 445 - n - cache size {0,1,2,3,4,5} (default 5) 446 447 Notes: SPAI uses a hash table to cache messages and avoid 448 redundant communication. If suggest always using 449 5. This parameter is irrelevant in the serial 450 version. 451 452 Level: intermediate 453 454 .seealso: PCSPAI, PCSetType() 455 @*/ 456 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 457 { 458 PetscErrorCode ierr; 459 PetscFunctionBegin; 460 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 /**********************************************************************/ 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCSPAISetVerbose" 468 /*@ 469 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 470 471 Input Parameters: 472 + pc - the preconditioner 473 - n - level (default 1) 474 475 Notes: print parameters, timings and matrix statistics 476 477 Level: intermediate 478 479 .seealso: PCSPAI, PCSetType() 480 @*/ 481 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 482 { 483 PetscErrorCode ierr; 484 PetscFunctionBegin; 485 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 /**********************************************************************/ 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "PCSPAISetSp" 493 /*@ 494 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 495 496 Input Parameters: 497 + pc - the preconditioner 498 - n - 0 or 1 499 500 Notes: If A has a symmetric nonzero pattern use -sp 1 to 501 improve performance by eliminating some communication 502 in the parallel version. Even if A does not have a 503 symmetric nonzero pattern -sp 1 may well lead to good 504 results, but the code will not follow the published 505 SPAI algorithm exactly. 506 507 508 Level: intermediate 509 510 .seealso: PCSPAI, PCSetType() 511 @*/ 512 PetscErrorCode PCSPAISetSp(PC pc,int sp) 513 { 514 PetscErrorCode ierr; 515 PetscFunctionBegin; 516 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 517 PetscFunctionReturn(0); 518 } 519 520 /**********************************************************************/ 521 522 /**********************************************************************/ 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "PCSetFromOptions_SPAI" 526 static PetscErrorCode PCSetFromOptions_SPAI(PC pc) 527 { 528 PC_SPAI *ispai = (PC_SPAI*)pc->data; 529 PetscErrorCode ierr; 530 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 531 double epsilon1; 532 PetscBool flg; 533 534 PetscFunctionBegin; 535 ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr); 536 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 537 if (flg) { 538 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 539 } 540 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 541 if (flg) { 542 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 543 } 544 /* added 1/7/99 g.h. */ 545 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 546 if (flg) { 547 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 548 } 549 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 550 if (flg) { 551 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 552 } 553 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 554 if (flg) { 555 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 556 } 557 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 558 if (flg) { 559 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 560 } 561 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 562 if (flg) { 563 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 564 } 565 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 566 if (flg) { 567 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 568 } 569 ierr = PetscOptionsTail();CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 /**********************************************************************/ 574 575 /*MC 576 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 577 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 578 579 Options Database Keys: 580 + -pc_spai_epsilon <eps> - set tolerance 581 . -pc_spai_nbstep <n> - set nbsteps 582 . -pc_spai_max <m> - set max 583 . -pc_spai_max_new <m> - set maxnew 584 . -pc_spai_block_size <n> - set block size 585 . -pc_spai_cache_size <n> - set cache size 586 . -pc_spai_sp <m> - set sp 587 - -pc_spai_set_verbose <true,false> - verbose output 588 589 Notes: This only works with AIJ matrices. 590 591 Level: beginner 592 593 Concepts: approximate inverse 594 595 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 596 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 597 PCSPAISetVerbose(), PCSPAISetSp() 598 M*/ 599 600 EXTERN_C_BEGIN 601 #undef __FUNCT__ 602 #define __FUNCT__ "PCCreate_SPAI" 603 PetscErrorCode PCCreate_SPAI(PC pc) 604 { 605 PC_SPAI *ispai; 606 PetscErrorCode ierr; 607 608 PetscFunctionBegin; 609 ierr = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr); 610 pc->data = ispai; 611 612 pc->ops->destroy = PCDestroy_SPAI; 613 pc->ops->apply = PCApply_SPAI; 614 pc->ops->applyrichardson = 0; 615 pc->ops->setup = PCSetUp_SPAI; 616 pc->ops->view = PCView_SPAI; 617 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 618 619 ispai->epsilon = .4; 620 ispai->nbsteps = 5; 621 ispai->max = 5000; 622 ispai->maxnew = 5; 623 ispai->block_size = 1; 624 ispai->cache_size = 5; 625 ispai->verbose = 0; 626 627 ispai->sp = 1; 628 ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr); 629 630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C", 631 "PCSPAISetEpsilon_SPAI", 632 PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C", 634 "PCSPAISetNBSteps_SPAI", 635 PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C", 637 "PCSPAISetMax_SPAI", 638 PCSPAISetMax_SPAI);CHKERRQ(ierr); 639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC", 640 "PCSPAISetMaxNew_SPAI", 641 PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C", 643 "PCSPAISetBlockSize_SPAI", 644 PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C", 646 "PCSPAISetCacheSize_SPAI", 647 PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C", 649 "PCSPAISetVerbose_SPAI", 650 PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C", 652 "PCSPAISetSp_SPAI", 653 PCSPAISetSp_SPAI);CHKERRQ(ierr); 654 655 PetscFunctionReturn(0); 656 } 657 EXTERN_C_END 658 659 /**********************************************************************/ 660 661 /* 662 Converts from a PETSc matrix to an SPAI matrix 663 */ 664 #undef __FUNCT__ 665 #define __FUNCT__ "ConvertMatToMatrix" 666 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 667 { 668 matrix *M; 669 int i,j,col; 670 int row_indx; 671 int len,pe,local_indx,start_indx; 672 int *mapping; 673 PetscErrorCode ierr; 674 const int *cols; 675 const double *vals; 676 int *num_ptr,n,mnl,nnl,nz,rstart,rend; 677 PetscMPIInt size,rank; 678 struct compressed_lines *rows; 679 680 PetscFunctionBegin; 681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 682 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 683 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 684 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 685 686 /* 687 not sure why a barrier is required. commenting out 688 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 689 */ 690 691 M = new_matrix((SPAI_Comm)comm); 692 693 M->n = n; 694 M->bs = 1; 695 M->max_block_size = 1; 696 697 M->mnls = (int*)malloc(sizeof(int)*size); 698 M->start_indices = (int*)malloc(sizeof(int)*size); 699 M->pe = (int*)malloc(sizeof(int)*n); 700 M->block_sizes = (int*)malloc(sizeof(int)*n); 701 for (i=0; i<n; i++) M->block_sizes[i] = 1; 702 703 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 704 705 M->start_indices[0] = 0; 706 for (i=1; i<size; i++) { 707 M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 708 } 709 710 M->mnl = M->mnls[M->myid]; 711 M->my_start_index = M->start_indices[M->myid]; 712 713 for (i=0; i<size; i++) { 714 start_indx = M->start_indices[i]; 715 for (j=0; j<M->mnls[i]; j++) 716 M->pe[start_indx+j] = i; 717 } 718 719 if (AT) { 720 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 721 } else { 722 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 723 } 724 725 rows = M->lines; 726 727 /* Determine the mapping from global indices to pointers */ 728 ierr = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr); 729 pe = 0; 730 local_indx = 0; 731 for (i=0; i<M->n; i++) { 732 if (local_indx >= M->mnls[pe]) { 733 pe++; 734 local_indx = 0; 735 } 736 mapping[i] = local_indx + M->start_indices[pe]; 737 local_indx++; 738 } 739 740 741 ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr); 742 743 /*********************************************************/ 744 /************** Set up the row structure *****************/ 745 /*********************************************************/ 746 747 /* count number of nonzeros in every row */ 748 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 749 for (i=rstart; i<rend; i++) { 750 ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 751 ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 752 } 753 754 /* allocate buffers */ 755 len = 0; 756 for (i=0; i<mnl; i++) { 757 if (len < num_ptr[i]) len = num_ptr[i]; 758 } 759 760 for (i=rstart; i<rend; i++) { 761 row_indx = i-rstart; 762 len = num_ptr[row_indx]; 763 rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int)); 764 rows->A[row_indx] = (double*)malloc(len*sizeof(double)); 765 } 766 767 /* copy the matrix */ 768 for (i=rstart; i<rend; i++) { 769 row_indx = i - rstart; 770 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 771 for (j=0; j<nz; j++) { 772 col = cols[j]; 773 len = rows->len[row_indx]++; 774 rows->ptrs[row_indx][len] = mapping[col]; 775 rows->A[row_indx][len] = vals[j]; 776 } 777 rows->slen[row_indx] = rows->len[row_indx]; 778 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 779 } 780 781 782 /************************************************************/ 783 /************** Set up the column structure *****************/ 784 /*********************************************************/ 785 786 if (AT) { 787 788 /* count number of nonzeros in every column */ 789 for (i=rstart; i<rend; i++) { 790 ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 791 ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 792 } 793 794 /* allocate buffers */ 795 len = 0; 796 for (i=0; i<mnl; i++) { 797 if (len < num_ptr[i]) len = num_ptr[i]; 798 } 799 800 for (i=rstart; i<rend; i++) { 801 row_indx = i-rstart; 802 len = num_ptr[row_indx]; 803 rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int)); 804 } 805 806 /* copy the matrix (i.e., the structure) */ 807 for (i=rstart; i<rend; i++) { 808 row_indx = i - rstart; 809 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 810 for (j=0; j<nz; j++) { 811 col = cols[j]; 812 len = rows->rlen[row_indx]++; 813 rows->rptrs[row_indx][len] = mapping[col]; 814 } 815 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 816 } 817 } 818 819 ierr = PetscFree(num_ptr);CHKERRQ(ierr); 820 ierr = PetscFree(mapping);CHKERRQ(ierr); 821 822 order_pointers(M); 823 M->maxnz = calc_maxnz(M); 824 825 *B = M; 826 827 PetscFunctionReturn(0); 828 } 829 830 /**********************************************************************/ 831 832 /* 833 Converts from an SPAI matrix B to a PETSc matrix PB. 834 This assumes that the the SPAI matrix B is stored in 835 COMPRESSED-ROW format. 836 */ 837 #undef __FUNCT__ 838 #define __FUNCT__ "ConvertMatrixToMat" 839 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 840 { 841 PetscMPIInt size,rank; 842 PetscErrorCode ierr; 843 int m,n,M,N; 844 int d_nz,o_nz; 845 int *d_nnz,*o_nnz; 846 int i,k,global_row,global_col,first_diag_col,last_diag_col; 847 PetscScalar val; 848 849 PetscFunctionBegin; 850 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 851 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 852 853 m = n = B->mnls[rank]; 854 d_nz = o_nz = 0; 855 856 /* Determine preallocation for MatCreateMPIAIJ */ 857 ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 858 ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr); 859 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 860 first_diag_col = B->start_indices[rank]; 861 last_diag_col = first_diag_col + B->mnls[rank]; 862 for (i=0; i<B->mnls[rank]; i++) { 863 for (k=0; k<B->lines->len[i]; k++) { 864 global_col = B->lines->ptrs[i][k]; 865 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 866 else o_nnz[i]++; 867 } 868 } 869 870 M = N = B->n; 871 /* Here we only know how to create AIJ format */ 872 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 873 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 874 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 875 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 876 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 877 878 for (i=0; i<B->mnls[rank]; i++) { 879 global_row = B->start_indices[rank]+i; 880 for (k=0; k<B->lines->len[i]; k++) { 881 global_col = B->lines->ptrs[i][k]; 882 val = B->lines->A[i][k]; 883 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 884 } 885 } 886 887 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 888 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 889 890 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 891 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 892 893 PetscFunctionReturn(0); 894 } 895 896 /**********************************************************************/ 897 898 /* 899 Converts from an SPAI vector v to a PETSc vec Pv. 900 */ 901 #undef __FUNCT__ 902 #define __FUNCT__ "ConvertVectorToVec" 903 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 904 { 905 PetscErrorCode ierr; 906 PetscMPIInt size,rank; 907 int m,M,i,*mnls,*start_indices,*global_indices; 908 909 PetscFunctionBegin; 910 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 911 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 912 913 m = v->mnl; 914 M = v->n; 915 916 917 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 918 919 ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr); 920 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 921 922 ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr); 923 start_indices[0] = 0; 924 for (i=1; i<size; i++) 925 start_indices[i] = start_indices[i-1] +mnls[i-1]; 926 927 ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr); 928 for (i=0; i<v->mnl; i++) 929 global_indices[i] = start_indices[rank] + i; 930 931 ierr = PetscFree(mnls);CHKERRQ(ierr); 932 ierr = PetscFree(start_indices);CHKERRQ(ierr); 933 934 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 935 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 936 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 937 938 ierr = PetscFree(global_indices);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 943 944 945