1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 PetscErrorCode ierr; 65 Mat AT; 66 67 PetscFunctionBegin; 68 init_SPAI(); 69 70 if (ispai->sp) { 71 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 72 } else { 73 /* Use the transpose to get the column nonzero structure. */ 74 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 75 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 76 ierr = MatDestroy(&AT);CHKERRQ(ierr); 77 } 78 79 /* Destroy the transpose */ 80 /* Don't know how to do it. PETSc developers? */ 81 82 /* construct SPAI preconditioner */ 83 /* FILE *messages */ /* file for warning messages */ 84 /* double epsilon */ /* tolerance */ 85 /* int nbsteps */ /* max number of "improvement" steps per line */ 86 /* int max */ /* max dimensions of is_I, q, etc. */ 87 /* int maxnew */ /* max number of new entries per step */ 88 /* int block_size */ /* block_size == 1 specifies scalar elments 89 block_size == n specifies nxn constant-block elements 90 block_size == 0 specifies variable-block elements */ 91 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 92 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 93 verbose == 1 prints timing and matrix statistics */ 94 95 ierr = bspai(ispai->B,&ispai->M, 96 stdout, 97 ispai->epsilon, 98 ispai->nbsteps, 99 ispai->max, 100 ispai->maxnew, 101 ispai->block_size, 102 ispai->cache_size, 103 ispai->verbose);CHKERRQ(ierr); 104 105 ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr); 106 107 /* free the SPAI matrices */ 108 sp_free_matrix(ispai->B); 109 sp_free_matrix(ispai->M); 110 PetscFunctionReturn(0); 111 } 112 113 /**********************************************************************/ 114 115 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 116 { 117 PC_SPAI *ispai = (PC_SPAI*)pc->data; 118 PetscErrorCode ierr; 119 120 PetscFunctionBegin; 121 /* Now using PETSc's multiply */ 122 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 /**********************************************************************/ 127 128 static PetscErrorCode PCDestroy_SPAI(PC pc) 129 { 130 PetscErrorCode ierr; 131 PC_SPAI *ispai = (PC_SPAI*)pc->data; 132 133 PetscFunctionBegin; 134 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 135 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 136 ierr = PetscFree(pc->data);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 /**********************************************************************/ 141 142 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 143 { 144 PC_SPAI *ispai = (PC_SPAI*)pc->data; 145 PetscErrorCode ierr; 146 PetscBool iascii; 147 148 PetscFunctionBegin; 149 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 150 if (iascii) { 151 ierr = PetscViewerASCIIPrintf(viewer," epsilon %g\n", (double)ispai->epsilon);CHKERRQ(ierr); 152 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 153 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 154 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 155 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 156 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 157 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 158 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 159 } 160 PetscFunctionReturn(0); 161 } 162 163 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 164 { 165 PC_SPAI *ispai = (PC_SPAI*)pc->data; 166 167 PetscFunctionBegin; 168 ispai->epsilon = epsilon1; 169 PetscFunctionReturn(0); 170 } 171 172 /**********************************************************************/ 173 174 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 175 { 176 PC_SPAI *ispai = (PC_SPAI*)pc->data; 177 178 PetscFunctionBegin; 179 ispai->nbsteps = nbsteps1; 180 PetscFunctionReturn(0); 181 } 182 183 /**********************************************************************/ 184 185 /* added 1/7/99 g.h. */ 186 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 187 { 188 PC_SPAI *ispai = (PC_SPAI*)pc->data; 189 190 PetscFunctionBegin; 191 ispai->max = max1; 192 PetscFunctionReturn(0); 193 } 194 195 /**********************************************************************/ 196 197 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 198 { 199 PC_SPAI *ispai = (PC_SPAI*)pc->data; 200 201 PetscFunctionBegin; 202 ispai->maxnew = maxnew1; 203 PetscFunctionReturn(0); 204 } 205 206 /**********************************************************************/ 207 208 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 209 { 210 PC_SPAI *ispai = (PC_SPAI*)pc->data; 211 212 PetscFunctionBegin; 213 ispai->block_size = block_size1; 214 PetscFunctionReturn(0); 215 } 216 217 /**********************************************************************/ 218 219 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 220 { 221 PC_SPAI *ispai = (PC_SPAI*)pc->data; 222 223 PetscFunctionBegin; 224 ispai->cache_size = cache_size; 225 PetscFunctionReturn(0); 226 } 227 228 /**********************************************************************/ 229 230 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 231 { 232 PC_SPAI *ispai = (PC_SPAI*)pc->data; 233 234 PetscFunctionBegin; 235 ispai->verbose = verbose; 236 PetscFunctionReturn(0); 237 } 238 239 /**********************************************************************/ 240 241 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 242 { 243 PC_SPAI *ispai = (PC_SPAI*)pc->data; 244 245 PetscFunctionBegin; 246 ispai->sp = sp; 247 PetscFunctionReturn(0); 248 } 249 250 /* -------------------------------------------------------------------*/ 251 252 /*@ 253 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 254 255 Input Parameters: 256 + pc - the preconditioner 257 - eps - epsilon (default .4) 258 259 Notes: Espilon must be between 0 and 1. It controls the 260 quality of the approximation of M to the inverse of 261 A. Higher values of epsilon lead to more work, more 262 fill, and usually better preconditioners. In many 263 cases the best choice of epsilon is the one that 264 divides the total solution time equally between the 265 preconditioner and the solver. 266 267 Level: intermediate 268 269 .seealso: PCSPAI, PCSetType() 270 @*/ 271 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 272 { 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 /**********************************************************************/ 281 282 /*@ 283 PCSPAISetNBSteps - set maximum number of improvement steps per row in 284 the SPAI preconditioner 285 286 Input Parameters: 287 + pc - the preconditioner 288 - n - number of steps (default 5) 289 290 Notes: SPAI constructs to approximation to every column of 291 the exact inverse of A in a series of improvement 292 steps. The quality of the approximation is determined 293 by epsilon. If an approximation achieving an accuracy 294 of epsilon is not obtained after ns steps, SPAI simply 295 uses the best approximation constructed so far. 296 297 Level: intermediate 298 299 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 300 @*/ 301 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 302 { 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 /**********************************************************************/ 311 312 /* added 1/7/99 g.h. */ 313 /*@ 314 PCSPAISetMax - set the size of various working buffers in 315 the SPAI preconditioner 316 317 Input Parameters: 318 + pc - the preconditioner 319 - n - size (default is 5000) 320 321 Level: intermediate 322 323 .seealso: PCSPAI, PCSetType() 324 @*/ 325 PetscErrorCode PCSPAISetMax(PC pc,int max1) 326 { 327 PetscErrorCode ierr; 328 329 PetscFunctionBegin; 330 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 /**********************************************************************/ 335 336 /*@ 337 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 338 in SPAI preconditioner 339 340 Input Parameters: 341 + pc - the preconditioner 342 - n - maximum number (default 5) 343 344 Level: intermediate 345 346 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 347 @*/ 348 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 349 { 350 PetscErrorCode ierr; 351 352 PetscFunctionBegin; 353 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 354 PetscFunctionReturn(0); 355 } 356 357 /**********************************************************************/ 358 359 /*@ 360 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 361 362 Input Parameters: 363 + pc - the preconditioner 364 - n - block size (default 1) 365 366 Notes: A block 367 size of 1 treats A as a matrix of scalar elements. A 368 block size of s > 1 treats A as a matrix of sxs 369 blocks. A block size of 0 treats A as a matrix with 370 variable sized blocks, which are determined by 371 searching for dense square diagonal blocks in A. 372 This can be very effective for finite-element 373 matrices. 374 375 SPAI will convert A to block form, use a block 376 version of the preconditioner algorithm, and then 377 convert the result back to scalar form. 378 379 In many cases the a block-size parameter other than 1 380 can lead to very significant improvement in 381 performance. 382 383 384 Level: intermediate 385 386 .seealso: PCSPAI, PCSetType() 387 @*/ 388 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 389 { 390 PetscErrorCode ierr; 391 392 PetscFunctionBegin; 393 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /**********************************************************************/ 398 399 /*@ 400 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 401 402 Input Parameters: 403 + pc - the preconditioner 404 - n - cache size {0,1,2,3,4,5} (default 5) 405 406 Notes: SPAI uses a hash table to cache messages and avoid 407 redundant communication. If suggest always using 408 5. This parameter is irrelevant in the serial 409 version. 410 411 Level: intermediate 412 413 .seealso: PCSPAI, PCSetType() 414 @*/ 415 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 416 { 417 PetscErrorCode ierr; 418 419 PetscFunctionBegin; 420 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 /**********************************************************************/ 425 426 /*@ 427 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 428 429 Input Parameters: 430 + pc - the preconditioner 431 - n - level (default 1) 432 433 Notes: print parameters, timings and matrix statistics 434 435 Level: intermediate 436 437 .seealso: PCSPAI, PCSetType() 438 @*/ 439 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 440 { 441 PetscErrorCode ierr; 442 443 PetscFunctionBegin; 444 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 /**********************************************************************/ 449 450 /*@ 451 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 452 453 Input Parameters: 454 + pc - the preconditioner 455 - n - 0 or 1 456 457 Notes: If A has a symmetric nonzero pattern use -sp 1 to 458 improve performance by eliminating some communication 459 in the parallel version. Even if A does not have a 460 symmetric nonzero pattern -sp 1 may well lead to good 461 results, but the code will not follow the published 462 SPAI algorithm exactly. 463 464 465 Level: intermediate 466 467 .seealso: PCSPAI, PCSetType() 468 @*/ 469 PetscErrorCode PCSPAISetSp(PC pc,int sp) 470 { 471 PetscErrorCode ierr; 472 473 PetscFunctionBegin; 474 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 /**********************************************************************/ 479 480 /**********************************************************************/ 481 482 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc) 483 { 484 PC_SPAI *ispai = (PC_SPAI*)pc->data; 485 PetscErrorCode ierr; 486 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 487 double epsilon1; 488 PetscBool flg; 489 490 PetscFunctionBegin; 491 ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr); 492 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 493 if (flg) { 494 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 495 } 496 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 497 if (flg) { 498 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 499 } 500 /* added 1/7/99 g.h. */ 501 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 502 if (flg) { 503 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 504 } 505 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 506 if (flg) { 507 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 508 } 509 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 510 if (flg) { 511 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 512 } 513 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 514 if (flg) { 515 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 516 } 517 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 518 if (flg) { 519 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 520 } 521 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 522 if (flg) { 523 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 524 } 525 ierr = PetscOptionsTail();CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 /**********************************************************************/ 530 531 /*MC 532 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 533 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 534 535 Options Database Keys: 536 + -pc_spai_epsilon <eps> - set tolerance 537 . -pc_spai_nbstep <n> - set nbsteps 538 . -pc_spai_max <m> - set max 539 . -pc_spai_max_new <m> - set maxnew 540 . -pc_spai_block_size <n> - set block size 541 . -pc_spai_cache_size <n> - set cache size 542 . -pc_spai_sp <m> - set sp 543 - -pc_spai_set_verbose <true,false> - verbose output 544 545 Notes: This only works with AIJ matrices. 546 547 Level: beginner 548 549 Concepts: approximate inverse 550 551 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 552 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 553 PCSPAISetVerbose(), PCSPAISetSp() 554 M*/ 555 556 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 557 { 558 PC_SPAI *ispai; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 ierr = PetscNewLog(pc,&ispai);CHKERRQ(ierr); 563 pc->data = ispai; 564 565 pc->ops->destroy = PCDestroy_SPAI; 566 pc->ops->apply = PCApply_SPAI; 567 pc->ops->applyrichardson = 0; 568 pc->ops->setup = PCSetUp_SPAI; 569 pc->ops->view = PCView_SPAI; 570 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 571 572 ispai->epsilon = .4; 573 ispai->nbsteps = 5; 574 ispai->max = 5000; 575 ispai->maxnew = 5; 576 ispai->block_size = 1; 577 ispai->cache_size = 5; 578 ispai->verbose = 0; 579 580 ispai->sp = 1; 581 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr); 582 583 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 /**********************************************************************/ 595 596 /* 597 Converts from a PETSc matrix to an SPAI matrix 598 */ 599 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 600 { 601 matrix *M; 602 int i,j,col; 603 int row_indx; 604 int len,pe,local_indx,start_indx; 605 int *mapping; 606 PetscErrorCode ierr; 607 const int *cols; 608 const double *vals; 609 int n,mnl,nnl,nz,rstart,rend; 610 PetscMPIInt size,rank; 611 struct compressed_lines *rows; 612 613 PetscFunctionBegin; 614 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 615 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 616 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 617 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 618 619 /* 620 not sure why a barrier is required. commenting out 621 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 622 */ 623 624 M = new_matrix((SPAI_Comm)comm); 625 626 M->n = n; 627 M->bs = 1; 628 M->max_block_size = 1; 629 630 M->mnls = (int*)malloc(sizeof(int)*size); 631 M->start_indices = (int*)malloc(sizeof(int)*size); 632 M->pe = (int*)malloc(sizeof(int)*n); 633 M->block_sizes = (int*)malloc(sizeof(int)*n); 634 for (i=0; i<n; i++) M->block_sizes[i] = 1; 635 636 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 637 638 M->start_indices[0] = 0; 639 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 640 641 M->mnl = M->mnls[M->myid]; 642 M->my_start_index = M->start_indices[M->myid]; 643 644 for (i=0; i<size; i++) { 645 start_indx = M->start_indices[i]; 646 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 647 } 648 649 if (AT) { 650 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 651 } else { 652 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 653 } 654 655 rows = M->lines; 656 657 /* Determine the mapping from global indices to pointers */ 658 ierr = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr); 659 pe = 0; 660 local_indx = 0; 661 for (i=0; i<M->n; i++) { 662 if (local_indx >= M->mnls[pe]) { 663 pe++; 664 local_indx = 0; 665 } 666 mapping[i] = local_indx + M->start_indices[pe]; 667 local_indx++; 668 } 669 670 /*********************************************************/ 671 /************** Set up the row structure *****************/ 672 /*********************************************************/ 673 674 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 675 for (i=rstart; i<rend; i++) { 676 row_indx = i - rstart; 677 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 678 /* allocate buffers */ 679 rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 680 rows->A[row_indx] = (double*)malloc(nz*sizeof(double)); 681 /* copy the matrix */ 682 for (j=0; j<nz; j++) { 683 col = cols[j]; 684 len = rows->len[row_indx]++; 685 686 rows->ptrs[row_indx][len] = mapping[col]; 687 rows->A[row_indx][len] = vals[j]; 688 } 689 rows->slen[row_indx] = rows->len[row_indx]; 690 691 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 692 } 693 694 695 /************************************************************/ 696 /************** Set up the column structure *****************/ 697 /*********************************************************/ 698 699 if (AT) { 700 701 for (i=rstart; i<rend; i++) { 702 row_indx = i - rstart; 703 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 704 /* allocate buffers */ 705 rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 706 /* copy the matrix (i.e., the structure) */ 707 for (j=0; j<nz; j++) { 708 col = cols[j]; 709 len = rows->rlen[row_indx]++; 710 711 rows->rptrs[row_indx][len] = mapping[col]; 712 } 713 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 714 } 715 } 716 717 ierr = PetscFree(mapping);CHKERRQ(ierr); 718 719 order_pointers(M); 720 M->maxnz = calc_maxnz(M); 721 *B = M; 722 PetscFunctionReturn(0); 723 } 724 725 /**********************************************************************/ 726 727 /* 728 Converts from an SPAI matrix B to a PETSc matrix PB. 729 This assumes that the SPAI matrix B is stored in 730 COMPRESSED-ROW format. 731 */ 732 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 733 { 734 PetscMPIInt size,rank; 735 PetscErrorCode ierr; 736 int m,n,M,N; 737 int d_nz,o_nz; 738 int *d_nnz,*o_nnz; 739 int i,k,global_row,global_col,first_diag_col,last_diag_col; 740 PetscScalar val; 741 742 PetscFunctionBegin; 743 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 744 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 745 746 m = n = B->mnls[rank]; 747 d_nz = o_nz = 0; 748 749 /* Determine preallocation for MatCreateMPIAIJ */ 750 ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr); 751 ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr); 752 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 753 first_diag_col = B->start_indices[rank]; 754 last_diag_col = first_diag_col + B->mnls[rank]; 755 for (i=0; i<B->mnls[rank]; i++) { 756 for (k=0; k<B->lines->len[i]; k++) { 757 global_col = B->lines->ptrs[i][k]; 758 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 759 else o_nnz[i]++; 760 } 761 } 762 763 M = N = B->n; 764 /* Here we only know how to create AIJ format */ 765 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 766 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 767 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 768 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 769 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 770 771 for (i=0; i<B->mnls[rank]; i++) { 772 global_row = B->start_indices[rank]+i; 773 for (k=0; k<B->lines->len[i]; k++) { 774 global_col = B->lines->ptrs[i][k]; 775 776 val = B->lines->A[i][k]; 777 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 778 } 779 } 780 781 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 782 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 783 784 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 785 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 /**********************************************************************/ 790 791 /* 792 Converts from an SPAI vector v to a PETSc vec Pv. 793 */ 794 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 795 { 796 PetscErrorCode ierr; 797 PetscMPIInt size,rank; 798 int m,M,i,*mnls,*start_indices,*global_indices; 799 800 PetscFunctionBegin; 801 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 802 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 803 804 m = v->mnl; 805 M = v->n; 806 807 808 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 809 810 ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr); 811 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 812 813 ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr); 814 815 start_indices[0] = 0; 816 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 817 818 ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr); 819 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 820 821 ierr = PetscFree(mnls);CHKERRQ(ierr); 822 ierr = PetscFree(start_indices);CHKERRQ(ierr); 823 824 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 825 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 826 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 827 828 ierr = PetscFree(global_indices);CHKERRQ(ierr); 829 PetscFunctionReturn(0); 830 } 831 832 833 834 835