1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 PetscErrorCode ierr; 65 Mat AT; 66 67 PetscFunctionBegin; 68 init_SPAI(); 69 70 if (ispai->sp) { 71 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 72 } else { 73 /* Use the transpose to get the column nonzero structure. */ 74 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 75 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 76 ierr = MatDestroy(&AT);CHKERRQ(ierr); 77 } 78 79 /* Destroy the transpose */ 80 /* Don't know how to do it. PETSc developers? */ 81 82 /* construct SPAI preconditioner */ 83 /* FILE *messages */ /* file for warning messages */ 84 /* double epsilon */ /* tolerance */ 85 /* int nbsteps */ /* max number of "improvement" steps per line */ 86 /* int max */ /* max dimensions of is_I, q, etc. */ 87 /* int maxnew */ /* max number of new entries per step */ 88 /* int block_size */ /* block_size == 1 specifies scalar elments 89 block_size == n specifies nxn constant-block elements 90 block_size == 0 specifies variable-block elements */ 91 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 92 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 93 verbose == 1 prints timing and matrix statistics */ 94 95 ierr = bspai(ispai->B,&ispai->M, 96 stdout, 97 ispai->epsilon, 98 ispai->nbsteps, 99 ispai->max, 100 ispai->maxnew, 101 ispai->block_size, 102 ispai->cache_size, 103 ispai->verbose);CHKERRQ(ierr); 104 105 ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr); 106 107 /* free the SPAI matrices */ 108 sp_free_matrix(ispai->B); 109 sp_free_matrix(ispai->M); 110 PetscFunctionReturn(0); 111 } 112 113 /**********************************************************************/ 114 115 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 116 { 117 PC_SPAI *ispai = (PC_SPAI*)pc->data; 118 PetscErrorCode ierr; 119 120 PetscFunctionBegin; 121 /* Now using PETSc's multiply */ 122 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 /**********************************************************************/ 127 128 static PetscErrorCode PCDestroy_SPAI(PC pc) 129 { 130 PetscErrorCode ierr; 131 PC_SPAI *ispai = (PC_SPAI*)pc->data; 132 133 PetscFunctionBegin; 134 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 135 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 136 ierr = PetscFree(pc->data);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 /**********************************************************************/ 141 142 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 143 { 144 PC_SPAI *ispai = (PC_SPAI*)pc->data; 145 PetscErrorCode ierr; 146 PetscBool iascii; 147 148 PetscFunctionBegin; 149 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 150 if (iascii) { 151 ierr = PetscViewerASCIIPrintf(viewer," epsilon %g\n", (double)ispai->epsilon);CHKERRQ(ierr); 152 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 153 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 154 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 155 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 156 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 157 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 158 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 159 } 160 PetscFunctionReturn(0); 161 } 162 163 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 164 { 165 PC_SPAI *ispai = (PC_SPAI*)pc->data; 166 167 PetscFunctionBegin; 168 ispai->epsilon = epsilon1; 169 PetscFunctionReturn(0); 170 } 171 172 /**********************************************************************/ 173 174 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 175 { 176 PC_SPAI *ispai = (PC_SPAI*)pc->data; 177 178 PetscFunctionBegin; 179 ispai->nbsteps = nbsteps1; 180 PetscFunctionReturn(0); 181 } 182 183 /**********************************************************************/ 184 185 /* added 1/7/99 g.h. */ 186 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 187 { 188 PC_SPAI *ispai = (PC_SPAI*)pc->data; 189 190 PetscFunctionBegin; 191 ispai->max = max1; 192 PetscFunctionReturn(0); 193 } 194 195 /**********************************************************************/ 196 197 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 198 { 199 PC_SPAI *ispai = (PC_SPAI*)pc->data; 200 201 PetscFunctionBegin; 202 ispai->maxnew = maxnew1; 203 PetscFunctionReturn(0); 204 } 205 206 /**********************************************************************/ 207 208 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 209 { 210 PC_SPAI *ispai = (PC_SPAI*)pc->data; 211 212 PetscFunctionBegin; 213 ispai->block_size = block_size1; 214 PetscFunctionReturn(0); 215 } 216 217 /**********************************************************************/ 218 219 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 220 { 221 PC_SPAI *ispai = (PC_SPAI*)pc->data; 222 223 PetscFunctionBegin; 224 ispai->cache_size = cache_size; 225 PetscFunctionReturn(0); 226 } 227 228 /**********************************************************************/ 229 230 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 231 { 232 PC_SPAI *ispai = (PC_SPAI*)pc->data; 233 234 PetscFunctionBegin; 235 ispai->verbose = verbose; 236 PetscFunctionReturn(0); 237 } 238 239 /**********************************************************************/ 240 241 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 242 { 243 PC_SPAI *ispai = (PC_SPAI*)pc->data; 244 245 PetscFunctionBegin; 246 ispai->sp = sp; 247 PetscFunctionReturn(0); 248 } 249 250 /* -------------------------------------------------------------------*/ 251 252 /*@ 253 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 254 255 Input Parameters: 256 + pc - the preconditioner 257 - eps - epsilon (default .4) 258 259 Notes: 260 Espilon must be between 0 and 1. It controls the 261 quality of the approximation of M to the inverse of 262 A. Higher values of epsilon lead to more work, more 263 fill, and usually better preconditioners. In many 264 cases the best choice of epsilon is the one that 265 divides the total solution time equally between the 266 preconditioner and the solver. 267 268 Level: intermediate 269 270 .seealso: PCSPAI, PCSetType() 271 @*/ 272 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 273 { 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 /**********************************************************************/ 282 283 /*@ 284 PCSPAISetNBSteps - set maximum number of improvement steps per row in 285 the SPAI preconditioner 286 287 Input Parameters: 288 + pc - the preconditioner 289 - n - number of steps (default 5) 290 291 Notes: 292 SPAI constructs to approximation to every column of 293 the exact inverse of A in a series of improvement 294 steps. The quality of the approximation is determined 295 by epsilon. If an approximation achieving an accuracy 296 of epsilon is not obtained after ns steps, SPAI simply 297 uses the best approximation constructed so far. 298 299 Level: intermediate 300 301 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 302 @*/ 303 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 304 { 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 /**********************************************************************/ 313 314 /* added 1/7/99 g.h. */ 315 /*@ 316 PCSPAISetMax - set the size of various working buffers in 317 the SPAI preconditioner 318 319 Input Parameters: 320 + pc - the preconditioner 321 - n - size (default is 5000) 322 323 Level: intermediate 324 325 .seealso: PCSPAI, PCSetType() 326 @*/ 327 PetscErrorCode PCSPAISetMax(PC pc,int max1) 328 { 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /**********************************************************************/ 337 338 /*@ 339 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 340 in SPAI preconditioner 341 342 Input Parameters: 343 + pc - the preconditioner 344 - n - maximum number (default 5) 345 346 Level: intermediate 347 348 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 349 @*/ 350 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 351 { 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 /**********************************************************************/ 360 361 /*@ 362 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 363 364 Input Parameters: 365 + pc - the preconditioner 366 - n - block size (default 1) 367 368 Notes: 369 A block 370 size of 1 treats A as a matrix of scalar elements. A 371 block size of s > 1 treats A as a matrix of sxs 372 blocks. A block size of 0 treats A as a matrix with 373 variable sized blocks, which are determined by 374 searching for dense square diagonal blocks in A. 375 This can be very effective for finite-element 376 matrices. 377 378 SPAI will convert A to block form, use a block 379 version of the preconditioner algorithm, and then 380 convert the result back to scalar form. 381 382 In many cases the a block-size parameter other than 1 383 can lead to very significant improvement in 384 performance. 385 386 387 Level: intermediate 388 389 .seealso: PCSPAI, PCSetType() 390 @*/ 391 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 392 { 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 /**********************************************************************/ 401 402 /*@ 403 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 404 405 Input Parameters: 406 + pc - the preconditioner 407 - n - cache size {0,1,2,3,4,5} (default 5) 408 409 Notes: 410 SPAI uses a hash table to cache messages and avoid 411 redundant communication. If suggest always using 412 5. This parameter is irrelevant in the serial 413 version. 414 415 Level: intermediate 416 417 .seealso: PCSPAI, PCSetType() 418 @*/ 419 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 420 { 421 PetscErrorCode ierr; 422 423 PetscFunctionBegin; 424 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 /**********************************************************************/ 429 430 /*@ 431 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 432 433 Input Parameters: 434 + pc - the preconditioner 435 - n - level (default 1) 436 437 Notes: 438 print parameters, timings and matrix statistics 439 440 Level: intermediate 441 442 .seealso: PCSPAI, PCSetType() 443 @*/ 444 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 445 { 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 /**********************************************************************/ 454 455 /*@ 456 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 457 458 Input Parameters: 459 + pc - the preconditioner 460 - n - 0 or 1 461 462 Notes: 463 If A has a symmetric nonzero pattern use -sp 1 to 464 improve performance by eliminating some communication 465 in the parallel version. Even if A does not have a 466 symmetric nonzero pattern -sp 1 may well lead to good 467 results, but the code will not follow the published 468 SPAI algorithm exactly. 469 470 471 Level: intermediate 472 473 .seealso: PCSPAI, PCSetType() 474 @*/ 475 PetscErrorCode PCSPAISetSp(PC pc,int sp) 476 { 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 /**********************************************************************/ 485 486 /**********************************************************************/ 487 488 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc) 489 { 490 PC_SPAI *ispai = (PC_SPAI*)pc->data; 491 PetscErrorCode ierr; 492 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 493 double epsilon1; 494 PetscBool flg; 495 496 PetscFunctionBegin; 497 ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr); 498 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 499 if (flg) { 500 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 501 } 502 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 503 if (flg) { 504 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 505 } 506 /* added 1/7/99 g.h. */ 507 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 508 if (flg) { 509 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 510 } 511 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 512 if (flg) { 513 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 514 } 515 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 516 if (flg) { 517 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 518 } 519 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 520 if (flg) { 521 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 522 } 523 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 524 if (flg) { 525 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 526 } 527 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 528 if (flg) { 529 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 530 } 531 ierr = PetscOptionsTail();CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 /**********************************************************************/ 536 537 /*MC 538 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 539 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 540 541 Options Database Keys: 542 + -pc_spai_epsilon <eps> - set tolerance 543 . -pc_spai_nbstep <n> - set nbsteps 544 . -pc_spai_max <m> - set max 545 . -pc_spai_max_new <m> - set maxnew 546 . -pc_spai_block_size <n> - set block size 547 . -pc_spai_cache_size <n> - set cache size 548 . -pc_spai_sp <m> - set sp 549 - -pc_spai_set_verbose <true,false> - verbose output 550 551 Notes: 552 This only works with AIJ matrices. 553 554 Level: beginner 555 556 Concepts: approximate inverse 557 558 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 559 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 560 PCSPAISetVerbose(), PCSPAISetSp() 561 M*/ 562 563 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 564 { 565 PC_SPAI *ispai; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = PetscNewLog(pc,&ispai);CHKERRQ(ierr); 570 pc->data = ispai; 571 572 pc->ops->destroy = PCDestroy_SPAI; 573 pc->ops->apply = PCApply_SPAI; 574 pc->ops->applyrichardson = 0; 575 pc->ops->setup = PCSetUp_SPAI; 576 pc->ops->view = PCView_SPAI; 577 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 578 579 ispai->epsilon = .4; 580 ispai->nbsteps = 5; 581 ispai->max = 5000; 582 ispai->maxnew = 5; 583 ispai->block_size = 1; 584 ispai->cache_size = 5; 585 ispai->verbose = 0; 586 587 ispai->sp = 1; 588 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr); 589 590 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 596 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } 600 601 /**********************************************************************/ 602 603 /* 604 Converts from a PETSc matrix to an SPAI matrix 605 */ 606 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 607 { 608 matrix *M; 609 int i,j,col; 610 int row_indx; 611 int len,pe,local_indx,start_indx; 612 int *mapping; 613 PetscErrorCode ierr; 614 const int *cols; 615 const double *vals; 616 int n,mnl,nnl,nz,rstart,rend; 617 PetscMPIInt size,rank; 618 struct compressed_lines *rows; 619 620 PetscFunctionBegin; 621 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 622 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 623 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 624 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 625 626 /* 627 not sure why a barrier is required. commenting out 628 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 629 */ 630 631 M = new_matrix((SPAI_Comm)comm); 632 633 M->n = n; 634 M->bs = 1; 635 M->max_block_size = 1; 636 637 M->mnls = (int*)malloc(sizeof(int)*size); 638 M->start_indices = (int*)malloc(sizeof(int)*size); 639 M->pe = (int*)malloc(sizeof(int)*n); 640 M->block_sizes = (int*)malloc(sizeof(int)*n); 641 for (i=0; i<n; i++) M->block_sizes[i] = 1; 642 643 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 644 645 M->start_indices[0] = 0; 646 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 647 648 M->mnl = M->mnls[M->myid]; 649 M->my_start_index = M->start_indices[M->myid]; 650 651 for (i=0; i<size; i++) { 652 start_indx = M->start_indices[i]; 653 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 654 } 655 656 if (AT) { 657 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 658 } else { 659 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 660 } 661 662 rows = M->lines; 663 664 /* Determine the mapping from global indices to pointers */ 665 ierr = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr); 666 pe = 0; 667 local_indx = 0; 668 for (i=0; i<M->n; i++) { 669 if (local_indx >= M->mnls[pe]) { 670 pe++; 671 local_indx = 0; 672 } 673 mapping[i] = local_indx + M->start_indices[pe]; 674 local_indx++; 675 } 676 677 /*********************************************************/ 678 /************** Set up the row structure *****************/ 679 /*********************************************************/ 680 681 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 682 for (i=rstart; i<rend; i++) { 683 row_indx = i - rstart; 684 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 685 /* allocate buffers */ 686 rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 687 rows->A[row_indx] = (double*)malloc(nz*sizeof(double)); 688 /* copy the matrix */ 689 for (j=0; j<nz; j++) { 690 col = cols[j]; 691 len = rows->len[row_indx]++; 692 693 rows->ptrs[row_indx][len] = mapping[col]; 694 rows->A[row_indx][len] = vals[j]; 695 } 696 rows->slen[row_indx] = rows->len[row_indx]; 697 698 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 699 } 700 701 702 /************************************************************/ 703 /************** Set up the column structure *****************/ 704 /*********************************************************/ 705 706 if (AT) { 707 708 for (i=rstart; i<rend; i++) { 709 row_indx = i - rstart; 710 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 711 /* allocate buffers */ 712 rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 713 /* copy the matrix (i.e., the structure) */ 714 for (j=0; j<nz; j++) { 715 col = cols[j]; 716 len = rows->rlen[row_indx]++; 717 718 rows->rptrs[row_indx][len] = mapping[col]; 719 } 720 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 721 } 722 } 723 724 ierr = PetscFree(mapping);CHKERRQ(ierr); 725 726 order_pointers(M); 727 M->maxnz = calc_maxnz(M); 728 *B = M; 729 PetscFunctionReturn(0); 730 } 731 732 /**********************************************************************/ 733 734 /* 735 Converts from an SPAI matrix B to a PETSc matrix PB. 736 This assumes that the SPAI matrix B is stored in 737 COMPRESSED-ROW format. 738 */ 739 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 740 { 741 PetscMPIInt size,rank; 742 PetscErrorCode ierr; 743 int m,n,M,N; 744 int d_nz,o_nz; 745 int *d_nnz,*o_nnz; 746 int i,k,global_row,global_col,first_diag_col,last_diag_col; 747 PetscScalar val; 748 749 PetscFunctionBegin; 750 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 751 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 752 753 m = n = B->mnls[rank]; 754 d_nz = o_nz = 0; 755 756 /* Determine preallocation for MatCreateMPIAIJ */ 757 ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr); 758 ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr); 759 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 760 first_diag_col = B->start_indices[rank]; 761 last_diag_col = first_diag_col + B->mnls[rank]; 762 for (i=0; i<B->mnls[rank]; i++) { 763 for (k=0; k<B->lines->len[i]; k++) { 764 global_col = B->lines->ptrs[i][k]; 765 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 766 else o_nnz[i]++; 767 } 768 } 769 770 M = N = B->n; 771 /* Here we only know how to create AIJ format */ 772 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 773 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 774 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 775 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 776 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 777 778 for (i=0; i<B->mnls[rank]; i++) { 779 global_row = B->start_indices[rank]+i; 780 for (k=0; k<B->lines->len[i]; k++) { 781 global_col = B->lines->ptrs[i][k]; 782 783 val = B->lines->A[i][k]; 784 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 785 } 786 } 787 788 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 789 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 790 791 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 792 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 /**********************************************************************/ 797 798 /* 799 Converts from an SPAI vector v to a PETSc vec Pv. 800 */ 801 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 802 { 803 PetscErrorCode ierr; 804 PetscMPIInt size,rank; 805 int m,M,i,*mnls,*start_indices,*global_indices; 806 807 PetscFunctionBegin; 808 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 809 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 810 811 m = v->mnl; 812 M = v->n; 813 814 815 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 816 817 ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr); 818 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 819 820 ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr); 821 822 start_indices[0] = 0; 823 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 824 825 ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr); 826 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 827 828 ierr = PetscFree(mnls);CHKERRQ(ierr); 829 ierr = PetscFree(start_indices);CHKERRQ(ierr); 830 831 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 832 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 833 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 834 835 ierr = PetscFree(global_indices);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 840 841 842