1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 PetscErrorCode ierr; 65 Mat AT; 66 67 PetscFunctionBegin; 68 init_SPAI(); 69 70 if (ispai->sp) { 71 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 72 } else { 73 /* Use the transpose to get the column nonzero structure. */ 74 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 75 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 76 ierr = MatDestroy(&AT);CHKERRQ(ierr); 77 } 78 79 /* Destroy the transpose */ 80 /* Don't know how to do it. PETSc developers? */ 81 82 /* construct SPAI preconditioner */ 83 /* FILE *messages */ /* file for warning messages */ 84 /* double epsilon */ /* tolerance */ 85 /* int nbsteps */ /* max number of "improvement" steps per line */ 86 /* int max */ /* max dimensions of is_I, q, etc. */ 87 /* int maxnew */ /* max number of new entries per step */ 88 /* int block_size */ /* block_size == 1 specifies scalar elments 89 block_size == n specifies nxn constant-block elements 90 block_size == 0 specifies variable-block elements */ 91 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 92 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 93 verbose == 1 prints timing and matrix statistics */ 94 95 ierr = bspai(ispai->B,&ispai->M, 96 stdout, 97 ispai->epsilon, 98 ispai->nbsteps, 99 ispai->max, 100 ispai->maxnew, 101 ispai->block_size, 102 ispai->cache_size, 103 ispai->verbose);CHKERRQ(ierr); 104 105 ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr); 106 107 /* free the SPAI matrices */ 108 sp_free_matrix(ispai->B); 109 sp_free_matrix(ispai->M); 110 PetscFunctionReturn(0); 111 } 112 113 /**********************************************************************/ 114 115 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 116 { 117 PC_SPAI *ispai = (PC_SPAI*)pc->data; 118 PetscErrorCode ierr; 119 120 PetscFunctionBegin; 121 /* Now using PETSc's multiply */ 122 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 static PetscErrorCode PCMatApply_SPAI(PC pc,Mat X,Mat Y) 127 { 128 PC_SPAI *ispai = (PC_SPAI*)pc->data; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* Now using PETSc's multiply */ 133 ierr = MatMatMult(ispai->PM,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 /**********************************************************************/ 138 139 static PetscErrorCode PCDestroy_SPAI(PC pc) 140 { 141 PetscErrorCode ierr; 142 PC_SPAI *ispai = (PC_SPAI*)pc->data; 143 144 PetscFunctionBegin; 145 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 146 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRMPI(ierr); 147 ierr = PetscFree(pc->data);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 /**********************************************************************/ 152 153 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 154 { 155 PC_SPAI *ispai = (PC_SPAI*)pc->data; 156 PetscErrorCode ierr; 157 PetscBool iascii; 158 159 PetscFunctionBegin; 160 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 161 if (iascii) { 162 ierr = PetscViewerASCIIPrintf(viewer," epsilon %g\n", (double)ispai->epsilon);CHKERRQ(ierr); 163 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 164 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 165 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 166 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 167 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 168 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 169 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 175 { 176 PC_SPAI *ispai = (PC_SPAI*)pc->data; 177 178 PetscFunctionBegin; 179 ispai->epsilon = epsilon1; 180 PetscFunctionReturn(0); 181 } 182 183 /**********************************************************************/ 184 185 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 186 { 187 PC_SPAI *ispai = (PC_SPAI*)pc->data; 188 189 PetscFunctionBegin; 190 ispai->nbsteps = nbsteps1; 191 PetscFunctionReturn(0); 192 } 193 194 /**********************************************************************/ 195 196 /* added 1/7/99 g.h. */ 197 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 198 { 199 PC_SPAI *ispai = (PC_SPAI*)pc->data; 200 201 PetscFunctionBegin; 202 ispai->max = max1; 203 PetscFunctionReturn(0); 204 } 205 206 /**********************************************************************/ 207 208 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 209 { 210 PC_SPAI *ispai = (PC_SPAI*)pc->data; 211 212 PetscFunctionBegin; 213 ispai->maxnew = maxnew1; 214 PetscFunctionReturn(0); 215 } 216 217 /**********************************************************************/ 218 219 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 220 { 221 PC_SPAI *ispai = (PC_SPAI*)pc->data; 222 223 PetscFunctionBegin; 224 ispai->block_size = block_size1; 225 PetscFunctionReturn(0); 226 } 227 228 /**********************************************************************/ 229 230 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 231 { 232 PC_SPAI *ispai = (PC_SPAI*)pc->data; 233 234 PetscFunctionBegin; 235 ispai->cache_size = cache_size; 236 PetscFunctionReturn(0); 237 } 238 239 /**********************************************************************/ 240 241 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 242 { 243 PC_SPAI *ispai = (PC_SPAI*)pc->data; 244 245 PetscFunctionBegin; 246 ispai->verbose = verbose; 247 PetscFunctionReturn(0); 248 } 249 250 /**********************************************************************/ 251 252 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 253 { 254 PC_SPAI *ispai = (PC_SPAI*)pc->data; 255 256 PetscFunctionBegin; 257 ispai->sp = sp; 258 PetscFunctionReturn(0); 259 } 260 261 /* -------------------------------------------------------------------*/ 262 263 /*@ 264 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 265 266 Input Parameters: 267 + pc - the preconditioner 268 - eps - epsilon (default .4) 269 270 Notes: 271 Espilon must be between 0 and 1. It controls the 272 quality of the approximation of M to the inverse of 273 A. Higher values of epsilon lead to more work, more 274 fill, and usually better preconditioners. In many 275 cases the best choice of epsilon is the one that 276 divides the total solution time equally between the 277 preconditioner and the solver. 278 279 Level: intermediate 280 281 .seealso: PCSPAI, PCSetType() 282 @*/ 283 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 284 { 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 /**********************************************************************/ 293 294 /*@ 295 PCSPAISetNBSteps - set maximum number of improvement steps per row in 296 the SPAI preconditioner 297 298 Input Parameters: 299 + pc - the preconditioner 300 - n - number of steps (default 5) 301 302 Notes: 303 SPAI constructs to approximation to every column of 304 the exact inverse of A in a series of improvement 305 steps. The quality of the approximation is determined 306 by epsilon. If an approximation achieving an accuracy 307 of epsilon is not obtained after ns steps, SPAI simply 308 uses the best approximation constructed so far. 309 310 Level: intermediate 311 312 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 313 @*/ 314 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 315 { 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 /**********************************************************************/ 324 325 /* added 1/7/99 g.h. */ 326 /*@ 327 PCSPAISetMax - set the size of various working buffers in 328 the SPAI preconditioner 329 330 Input Parameters: 331 + pc - the preconditioner 332 - n - size (default is 5000) 333 334 Level: intermediate 335 336 .seealso: PCSPAI, PCSetType() 337 @*/ 338 PetscErrorCode PCSPAISetMax(PC pc,int max1) 339 { 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 /**********************************************************************/ 348 349 /*@ 350 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 351 in SPAI preconditioner 352 353 Input Parameters: 354 + pc - the preconditioner 355 - n - maximum number (default 5) 356 357 Level: intermediate 358 359 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 360 @*/ 361 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 362 { 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 /**********************************************************************/ 371 372 /*@ 373 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 374 375 Input Parameters: 376 + pc - the preconditioner 377 - n - block size (default 1) 378 379 Notes: 380 A block 381 size of 1 treats A as a matrix of scalar elements. A 382 block size of s > 1 treats A as a matrix of sxs 383 blocks. A block size of 0 treats A as a matrix with 384 variable sized blocks, which are determined by 385 searching for dense square diagonal blocks in A. 386 This can be very effective for finite-element 387 matrices. 388 389 SPAI will convert A to block form, use a block 390 version of the preconditioner algorithm, and then 391 convert the result back to scalar form. 392 393 In many cases the a block-size parameter other than 1 394 can lead to very significant improvement in 395 performance. 396 397 398 Level: intermediate 399 400 .seealso: PCSPAI, PCSetType() 401 @*/ 402 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 403 { 404 PetscErrorCode ierr; 405 406 PetscFunctionBegin; 407 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 /**********************************************************************/ 412 413 /*@ 414 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 415 416 Input Parameters: 417 + pc - the preconditioner 418 - n - cache size {0,1,2,3,4,5} (default 5) 419 420 Notes: 421 SPAI uses a hash table to cache messages and avoid 422 redundant communication. If suggest always using 423 5. This parameter is irrelevant in the serial 424 version. 425 426 Level: intermediate 427 428 .seealso: PCSPAI, PCSetType() 429 @*/ 430 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 431 { 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 436 PetscFunctionReturn(0); 437 } 438 439 /**********************************************************************/ 440 441 /*@ 442 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 443 444 Input Parameters: 445 + pc - the preconditioner 446 - n - level (default 1) 447 448 Notes: 449 print parameters, timings and matrix statistics 450 451 Level: intermediate 452 453 .seealso: PCSPAI, PCSetType() 454 @*/ 455 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 456 { 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 /**********************************************************************/ 465 466 /*@ 467 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 468 469 Input Parameters: 470 + pc - the preconditioner 471 - n - 0 or 1 472 473 Notes: 474 If A has a symmetric nonzero pattern use -sp 1 to 475 improve performance by eliminating some communication 476 in the parallel version. Even if A does not have a 477 symmetric nonzero pattern -sp 1 may well lead to good 478 results, but the code will not follow the published 479 SPAI algorithm exactly. 480 481 482 Level: intermediate 483 484 .seealso: PCSPAI, PCSetType() 485 @*/ 486 PetscErrorCode PCSPAISetSp(PC pc,int sp) 487 { 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 /**********************************************************************/ 496 497 /**********************************************************************/ 498 499 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc) 500 { 501 PC_SPAI *ispai = (PC_SPAI*)pc->data; 502 PetscErrorCode ierr; 503 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 504 double epsilon1; 505 PetscBool flg; 506 507 PetscFunctionBegin; 508 ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr); 509 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 510 if (flg) { 511 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 512 } 513 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 514 if (flg) { 515 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 516 } 517 /* added 1/7/99 g.h. */ 518 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 519 if (flg) { 520 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 521 } 522 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 523 if (flg) { 524 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 525 } 526 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 527 if (flg) { 528 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 529 } 530 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 531 if (flg) { 532 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 533 } 534 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 535 if (flg) { 536 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 537 } 538 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 539 if (flg) { 540 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 541 } 542 ierr = PetscOptionsTail();CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 /**********************************************************************/ 547 548 /*MC 549 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 550 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 551 552 Options Database Keys: 553 + -pc_spai_epsilon <eps> - set tolerance 554 . -pc_spai_nbstep <n> - set nbsteps 555 . -pc_spai_max <m> - set max 556 . -pc_spai_max_new <m> - set maxnew 557 . -pc_spai_block_size <n> - set block size 558 . -pc_spai_cache_size <n> - set cache size 559 . -pc_spai_sp <m> - set sp 560 - -pc_spai_set_verbose <true,false> - verbose output 561 562 Notes: 563 This only works with AIJ matrices. 564 565 Level: beginner 566 567 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 568 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 569 PCSPAISetVerbose(), PCSPAISetSp() 570 M*/ 571 572 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 573 { 574 PC_SPAI *ispai; 575 PetscErrorCode ierr; 576 577 PetscFunctionBegin; 578 ierr = PetscNewLog(pc,&ispai);CHKERRQ(ierr); 579 pc->data = ispai; 580 581 pc->ops->destroy = PCDestroy_SPAI; 582 pc->ops->apply = PCApply_SPAI; 583 pc->ops->matapply = PCMatApply_SPAI; 584 pc->ops->applyrichardson = 0; 585 pc->ops->setup = PCSetUp_SPAI; 586 pc->ops->view = PCView_SPAI; 587 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 588 589 ispai->epsilon = .4; 590 ispai->nbsteps = 5; 591 ispai->max = 5000; 592 ispai->maxnew = 5; 593 ispai->block_size = 1; 594 ispai->cache_size = 5; 595 ispai->verbose = 0; 596 597 ispai->sp = 1; 598 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr); 599 600 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 /**********************************************************************/ 612 613 /* 614 Converts from a PETSc matrix to an SPAI matrix 615 */ 616 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 617 { 618 matrix *M; 619 int i,j,col; 620 int row_indx; 621 int len,pe,local_indx,start_indx; 622 int *mapping; 623 PetscErrorCode ierr; 624 const int *cols; 625 const double *vals; 626 int n,mnl,nnl,nz,rstart,rend; 627 PetscMPIInt size,rank; 628 struct compressed_lines *rows; 629 630 PetscFunctionBegin; 631 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 632 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 633 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 634 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 635 636 /* 637 not sure why a barrier is required. commenting out 638 ierr = MPI_Barrier(comm);CHKERRMPI(ierr); 639 */ 640 641 M = new_matrix((SPAI_Comm)comm); 642 643 M->n = n; 644 M->bs = 1; 645 M->max_block_size = 1; 646 647 M->mnls = (int*)malloc(sizeof(int)*size); 648 M->start_indices = (int*)malloc(sizeof(int)*size); 649 M->pe = (int*)malloc(sizeof(int)*n); 650 M->block_sizes = (int*)malloc(sizeof(int)*n); 651 for (i=0; i<n; i++) M->block_sizes[i] = 1; 652 653 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRMPI(ierr); 654 655 M->start_indices[0] = 0; 656 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 657 658 M->mnl = M->mnls[M->myid]; 659 M->my_start_index = M->start_indices[M->myid]; 660 661 for (i=0; i<size; i++) { 662 start_indx = M->start_indices[i]; 663 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 664 } 665 666 if (AT) { 667 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 668 } else { 669 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 670 } 671 672 rows = M->lines; 673 674 /* Determine the mapping from global indices to pointers */ 675 ierr = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr); 676 pe = 0; 677 local_indx = 0; 678 for (i=0; i<M->n; i++) { 679 if (local_indx >= M->mnls[pe]) { 680 pe++; 681 local_indx = 0; 682 } 683 mapping[i] = local_indx + M->start_indices[pe]; 684 local_indx++; 685 } 686 687 /*********************************************************/ 688 /************** Set up the row structure *****************/ 689 /*********************************************************/ 690 691 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 692 for (i=rstart; i<rend; i++) { 693 row_indx = i - rstart; 694 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 695 /* allocate buffers */ 696 rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 697 rows->A[row_indx] = (double*)malloc(nz*sizeof(double)); 698 /* copy the matrix */ 699 for (j=0; j<nz; j++) { 700 col = cols[j]; 701 len = rows->len[row_indx]++; 702 703 rows->ptrs[row_indx][len] = mapping[col]; 704 rows->A[row_indx][len] = vals[j]; 705 } 706 rows->slen[row_indx] = rows->len[row_indx]; 707 708 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 709 } 710 711 712 /************************************************************/ 713 /************** Set up the column structure *****************/ 714 /*********************************************************/ 715 716 if (AT) { 717 718 for (i=rstart; i<rend; i++) { 719 row_indx = i - rstart; 720 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 721 /* allocate buffers */ 722 rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 723 /* copy the matrix (i.e., the structure) */ 724 for (j=0; j<nz; j++) { 725 col = cols[j]; 726 len = rows->rlen[row_indx]++; 727 728 rows->rptrs[row_indx][len] = mapping[col]; 729 } 730 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 731 } 732 } 733 734 ierr = PetscFree(mapping);CHKERRQ(ierr); 735 736 order_pointers(M); 737 M->maxnz = calc_maxnz(M); 738 *B = M; 739 PetscFunctionReturn(0); 740 } 741 742 /**********************************************************************/ 743 744 /* 745 Converts from an SPAI matrix B to a PETSc matrix PB. 746 This assumes that the SPAI matrix B is stored in 747 COMPRESSED-ROW format. 748 */ 749 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 750 { 751 PetscMPIInt size,rank; 752 PetscErrorCode ierr; 753 int m,n,M,N; 754 int d_nz,o_nz; 755 int *d_nnz,*o_nnz; 756 int i,k,global_row,global_col,first_diag_col,last_diag_col; 757 PetscScalar val; 758 759 PetscFunctionBegin; 760 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 761 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 762 763 m = n = B->mnls[rank]; 764 d_nz = o_nz = 0; 765 766 /* Determine preallocation for MatCreateMPIAIJ */ 767 ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr); 768 ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr); 769 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 770 first_diag_col = B->start_indices[rank]; 771 last_diag_col = first_diag_col + B->mnls[rank]; 772 for (i=0; i<B->mnls[rank]; i++) { 773 for (k=0; k<B->lines->len[i]; k++) { 774 global_col = B->lines->ptrs[i][k]; 775 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 776 else o_nnz[i]++; 777 } 778 } 779 780 M = N = B->n; 781 /* Here we only know how to create AIJ format */ 782 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 783 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 784 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 785 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 786 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 787 788 for (i=0; i<B->mnls[rank]; i++) { 789 global_row = B->start_indices[rank]+i; 790 for (k=0; k<B->lines->len[i]; k++) { 791 global_col = B->lines->ptrs[i][k]; 792 793 val = B->lines->A[i][k]; 794 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 795 } 796 } 797 798 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 799 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 800 801 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 802 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } 805 806 /**********************************************************************/ 807 808 /* 809 Converts from an SPAI vector v to a PETSc vec Pv. 810 */ 811 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 812 { 813 PetscErrorCode ierr; 814 PetscMPIInt size,rank; 815 int m,M,i,*mnls,*start_indices,*global_indices; 816 817 PetscFunctionBegin; 818 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 819 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 820 821 m = v->mnl; 822 M = v->n; 823 824 825 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 826 827 ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr); 828 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRMPI(ierr); 829 830 ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr); 831 832 start_indices[0] = 0; 833 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 834 835 ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr); 836 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 837 838 ierr = PetscFree(mnls);CHKERRQ(ierr); 839 ierr = PetscFree(start_indices);CHKERRQ(ierr); 840 841 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 842 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 843 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 844 845 ierr = PetscFree(global_indices);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848