1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 Mat AT; 65 66 PetscFunctionBegin; 67 init_SPAI(); 68 69 if (ispai->sp) { 70 PetscCall(ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B)); 71 } else { 72 /* Use the transpose to get the column nonzero structure. */ 73 PetscCall(MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT)); 74 PetscCall(ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B)); 75 PetscCall(MatDestroy(&AT)); 76 } 77 78 /* Destroy the transpose */ 79 /* Don't know how to do it. PETSc developers? */ 80 81 /* construct SPAI preconditioner */ 82 /* FILE *messages */ /* file for warning messages */ 83 /* double epsilon */ /* tolerance */ 84 /* int nbsteps */ /* max number of "improvement" steps per line */ 85 /* int max */ /* max dimensions of is_I, q, etc. */ 86 /* int maxnew */ /* max number of new entries per step */ 87 /* int block_size */ /* block_size == 1 specifies scalar elments 88 block_size == n specifies nxn constant-block elements 89 block_size == 0 specifies variable-block elements */ 90 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 91 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 92 verbose == 1 prints timing and matrix statistics */ 93 94 PetscCall(bspai(ispai->B,&ispai->M, 95 stdout, 96 ispai->epsilon, 97 ispai->nbsteps, 98 ispai->max, 99 ispai->maxnew, 100 ispai->block_size, 101 ispai->cache_size, 102 ispai->verbose)); 103 104 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM)); 105 106 /* free the SPAI matrices */ 107 sp_free_matrix(ispai->B); 108 sp_free_matrix(ispai->M); 109 PetscFunctionReturn(0); 110 } 111 112 /**********************************************************************/ 113 114 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 115 { 116 PC_SPAI *ispai = (PC_SPAI*)pc->data; 117 118 PetscFunctionBegin; 119 /* Now using PETSc's multiply */ 120 PetscCall(MatMult(ispai->PM,xx,y)); 121 PetscFunctionReturn(0); 122 } 123 124 static PetscErrorCode PCMatApply_SPAI(PC pc,Mat X,Mat Y) 125 { 126 PC_SPAI *ispai = (PC_SPAI*)pc->data; 127 128 PetscFunctionBegin; 129 /* Now using PETSc's multiply */ 130 PetscCall(MatMatMult(ispai->PM,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 131 PetscFunctionReturn(0); 132 } 133 134 /**********************************************************************/ 135 136 static PetscErrorCode PCDestroy_SPAI(PC pc) 137 { 138 PC_SPAI *ispai = (PC_SPAI*)pc->data; 139 140 PetscFunctionBegin; 141 PetscCall(MatDestroy(&ispai->PM)); 142 PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai))); 143 PetscCall(PetscFree(pc->data)); 144 PetscFunctionReturn(0); 145 } 146 147 /**********************************************************************/ 148 149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 150 { 151 PC_SPAI *ispai = (PC_SPAI*)pc->data; 152 PetscBool iascii; 153 154 PetscFunctionBegin; 155 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 156 if (iascii) { 157 PetscCall(PetscViewerASCIIPrintf(viewer," epsilon %g\n", (double)ispai->epsilon)); 158 PetscCall(PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps)); 159 PetscCall(PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max)); 160 PetscCall(PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew)); 161 PetscCall(PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size)); 162 PetscCall(PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size)); 163 PetscCall(PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose)); 164 PetscCall(PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp)); 165 } 166 PetscFunctionReturn(0); 167 } 168 169 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 170 { 171 PC_SPAI *ispai = (PC_SPAI*)pc->data; 172 173 PetscFunctionBegin; 174 ispai->epsilon = epsilon1; 175 PetscFunctionReturn(0); 176 } 177 178 /**********************************************************************/ 179 180 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 181 { 182 PC_SPAI *ispai = (PC_SPAI*)pc->data; 183 184 PetscFunctionBegin; 185 ispai->nbsteps = nbsteps1; 186 PetscFunctionReturn(0); 187 } 188 189 /**********************************************************************/ 190 191 /* added 1/7/99 g.h. */ 192 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 193 { 194 PC_SPAI *ispai = (PC_SPAI*)pc->data; 195 196 PetscFunctionBegin; 197 ispai->max = max1; 198 PetscFunctionReturn(0); 199 } 200 201 /**********************************************************************/ 202 203 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 204 { 205 PC_SPAI *ispai = (PC_SPAI*)pc->data; 206 207 PetscFunctionBegin; 208 ispai->maxnew = maxnew1; 209 PetscFunctionReturn(0); 210 } 211 212 /**********************************************************************/ 213 214 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 215 { 216 PC_SPAI *ispai = (PC_SPAI*)pc->data; 217 218 PetscFunctionBegin; 219 ispai->block_size = block_size1; 220 PetscFunctionReturn(0); 221 } 222 223 /**********************************************************************/ 224 225 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 226 { 227 PC_SPAI *ispai = (PC_SPAI*)pc->data; 228 229 PetscFunctionBegin; 230 ispai->cache_size = cache_size; 231 PetscFunctionReturn(0); 232 } 233 234 /**********************************************************************/ 235 236 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 237 { 238 PC_SPAI *ispai = (PC_SPAI*)pc->data; 239 240 PetscFunctionBegin; 241 ispai->verbose = verbose; 242 PetscFunctionReturn(0); 243 } 244 245 /**********************************************************************/ 246 247 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 248 { 249 PC_SPAI *ispai = (PC_SPAI*)pc->data; 250 251 PetscFunctionBegin; 252 ispai->sp = sp; 253 PetscFunctionReturn(0); 254 } 255 256 /* -------------------------------------------------------------------*/ 257 258 /*@ 259 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 260 261 Input Parameters: 262 + pc - the preconditioner 263 - eps - epsilon (default .4) 264 265 Notes: 266 Espilon must be between 0 and 1. It controls the 267 quality of the approximation of M to the inverse of 268 A. Higher values of epsilon lead to more work, more 269 fill, and usually better preconditioners. In many 270 cases the best choice of epsilon is the one that 271 divides the total solution time equally between the 272 preconditioner and the solver. 273 274 Level: intermediate 275 276 .seealso: PCSPAI, PCSetType() 277 @*/ 278 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 279 { 280 PetscFunctionBegin; 281 PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1)); 282 PetscFunctionReturn(0); 283 } 284 285 /**********************************************************************/ 286 287 /*@ 288 PCSPAISetNBSteps - set maximum number of improvement steps per row in 289 the SPAI preconditioner 290 291 Input Parameters: 292 + pc - the preconditioner 293 - n - number of steps (default 5) 294 295 Notes: 296 SPAI constructs to approximation to every column of 297 the exact inverse of A in a series of improvement 298 steps. The quality of the approximation is determined 299 by epsilon. If an approximation achieving an accuracy 300 of epsilon is not obtained after ns steps, SPAI simply 301 uses the best approximation constructed so far. 302 303 Level: intermediate 304 305 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 306 @*/ 307 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 308 { 309 PetscFunctionBegin; 310 PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1)); 311 PetscFunctionReturn(0); 312 } 313 314 /**********************************************************************/ 315 316 /* added 1/7/99 g.h. */ 317 /*@ 318 PCSPAISetMax - set the size of various working buffers in 319 the SPAI preconditioner 320 321 Input Parameters: 322 + pc - the preconditioner 323 - n - size (default is 5000) 324 325 Level: intermediate 326 327 .seealso: PCSPAI, PCSetType() 328 @*/ 329 PetscErrorCode PCSPAISetMax(PC pc,int max1) 330 { 331 PetscFunctionBegin; 332 PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1)); 333 PetscFunctionReturn(0); 334 } 335 336 /**********************************************************************/ 337 338 /*@ 339 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 340 in SPAI preconditioner 341 342 Input Parameters: 343 + pc - the preconditioner 344 - n - maximum number (default 5) 345 346 Level: intermediate 347 348 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 349 @*/ 350 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 351 { 352 PetscFunctionBegin; 353 PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1)); 354 PetscFunctionReturn(0); 355 } 356 357 /**********************************************************************/ 358 359 /*@ 360 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 361 362 Input Parameters: 363 + pc - the preconditioner 364 - n - block size (default 1) 365 366 Notes: 367 A block 368 size of 1 treats A as a matrix of scalar elements. A 369 block size of s > 1 treats A as a matrix of sxs 370 blocks. A block size of 0 treats A as a matrix with 371 variable sized blocks, which are determined by 372 searching for dense square diagonal blocks in A. 373 This can be very effective for finite-element 374 matrices. 375 376 SPAI will convert A to block form, use a block 377 version of the preconditioner algorithm, and then 378 convert the result back to scalar form. 379 380 In many cases the a block-size parameter other than 1 381 can lead to very significant improvement in 382 performance. 383 384 Level: intermediate 385 386 .seealso: PCSPAI, PCSetType() 387 @*/ 388 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 389 { 390 PetscFunctionBegin; 391 PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1)); 392 PetscFunctionReturn(0); 393 } 394 395 /**********************************************************************/ 396 397 /*@ 398 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 399 400 Input Parameters: 401 + pc - the preconditioner 402 - n - cache size {0,1,2,3,4,5} (default 5) 403 404 Notes: 405 SPAI uses a hash table to cache messages and avoid 406 redundant communication. If suggest always using 407 5. This parameter is irrelevant in the serial 408 version. 409 410 Level: intermediate 411 412 .seealso: PCSPAI, PCSetType() 413 @*/ 414 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 415 { 416 PetscFunctionBegin; 417 PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size)); 418 PetscFunctionReturn(0); 419 } 420 421 /**********************************************************************/ 422 423 /*@ 424 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 425 426 Input Parameters: 427 + pc - the preconditioner 428 - n - level (default 1) 429 430 Notes: 431 print parameters, timings and matrix statistics 432 433 Level: intermediate 434 435 .seealso: PCSPAI, PCSetType() 436 @*/ 437 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 438 { 439 PetscFunctionBegin; 440 PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose)); 441 PetscFunctionReturn(0); 442 } 443 444 /**********************************************************************/ 445 446 /*@ 447 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 448 449 Input Parameters: 450 + pc - the preconditioner 451 - n - 0 or 1 452 453 Notes: 454 If A has a symmetric nonzero pattern use -sp 1 to 455 improve performance by eliminating some communication 456 in the parallel version. Even if A does not have a 457 symmetric nonzero pattern -sp 1 may well lead to good 458 results, but the code will not follow the published 459 SPAI algorithm exactly. 460 461 Level: intermediate 462 463 .seealso: PCSPAI, PCSetType() 464 @*/ 465 PetscErrorCode PCSPAISetSp(PC pc,int sp) 466 { 467 PetscFunctionBegin; 468 PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp)); 469 PetscFunctionReturn(0); 470 } 471 472 /**********************************************************************/ 473 474 /**********************************************************************/ 475 476 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc) 477 { 478 PC_SPAI *ispai = (PC_SPAI*)pc->data; 479 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 480 double epsilon1; 481 PetscBool flg; 482 483 PetscFunctionBegin; 484 PetscCall(PetscOptionsHead(PetscOptionsObject,"SPAI options")); 485 PetscCall(PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg)); 486 if (flg) { 487 PetscCall(PCSPAISetEpsilon(pc,epsilon1)); 488 } 489 PetscCall(PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg)); 490 if (flg) { 491 PetscCall(PCSPAISetNBSteps(pc,nbsteps1)); 492 } 493 /* added 1/7/99 g.h. */ 494 PetscCall(PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg)); 495 if (flg) { 496 PetscCall(PCSPAISetMax(pc,max1)); 497 } 498 PetscCall(PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg)); 499 if (flg) { 500 PetscCall(PCSPAISetMaxNew(pc,maxnew1)); 501 } 502 PetscCall(PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg)); 503 if (flg) { 504 PetscCall(PCSPAISetBlockSize(pc,block_size1)); 505 } 506 PetscCall(PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg)); 507 if (flg) { 508 PetscCall(PCSPAISetCacheSize(pc,cache_size)); 509 } 510 PetscCall(PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg)); 511 if (flg) { 512 PetscCall(PCSPAISetVerbose(pc,verbose)); 513 } 514 PetscCall(PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg)); 515 if (flg) { 516 PetscCall(PCSPAISetSp(pc,sp)); 517 } 518 PetscCall(PetscOptionsTail()); 519 PetscFunctionReturn(0); 520 } 521 522 /**********************************************************************/ 523 524 /*MC 525 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 526 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 527 528 Options Database Keys: 529 + -pc_spai_epsilon <eps> - set tolerance 530 . -pc_spai_nbstep <n> - set nbsteps 531 . -pc_spai_max <m> - set max 532 . -pc_spai_max_new <m> - set maxnew 533 . -pc_spai_block_size <n> - set block size 534 . -pc_spai_cache_size <n> - set cache size 535 . -pc_spai_sp <m> - set sp 536 - -pc_spai_set_verbose <true,false> - verbose output 537 538 Notes: 539 This only works with AIJ matrices. 540 541 Level: beginner 542 543 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 544 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 545 PCSPAISetVerbose(), PCSPAISetSp() 546 M*/ 547 548 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 549 { 550 PC_SPAI *ispai; 551 552 PetscFunctionBegin; 553 PetscCall(PetscNewLog(pc,&ispai)); 554 pc->data = ispai; 555 556 pc->ops->destroy = PCDestroy_SPAI; 557 pc->ops->apply = PCApply_SPAI; 558 pc->ops->matapply = PCMatApply_SPAI; 559 pc->ops->applyrichardson = 0; 560 pc->ops->setup = PCSetUp_SPAI; 561 pc->ops->view = PCView_SPAI; 562 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 563 564 ispai->epsilon = .4; 565 ispai->nbsteps = 5; 566 ispai->max = 5000; 567 ispai->maxnew = 5; 568 ispai->block_size = 1; 569 ispai->cache_size = 5; 570 ispai->verbose = 0; 571 572 ispai->sp = 1; 573 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai))); 574 575 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI)); 578 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI)); 580 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI)); 581 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI)); 582 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI)); 583 PetscFunctionReturn(0); 584 } 585 586 /**********************************************************************/ 587 588 /* 589 Converts from a PETSc matrix to an SPAI matrix 590 */ 591 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 592 { 593 matrix *M; 594 int i,j,col; 595 int row_indx; 596 int len,pe,local_indx,start_indx; 597 int *mapping; 598 const int *cols; 599 const double *vals; 600 int n,mnl,nnl,nz,rstart,rend; 601 PetscMPIInt size,rank; 602 struct compressed_lines *rows; 603 604 PetscFunctionBegin; 605 PetscCallMPI(MPI_Comm_size(comm,&size)); 606 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 607 PetscCall(MatGetSize(A,&n,&n)); 608 PetscCall(MatGetLocalSize(A,&mnl,&nnl)); 609 610 /* 611 not sure why a barrier is required. commenting out 612 PetscCallMPI(MPI_Barrier(comm)); 613 */ 614 615 M = new_matrix((SPAI_Comm)comm); 616 617 M->n = n; 618 M->bs = 1; 619 M->max_block_size = 1; 620 621 M->mnls = (int*)malloc(sizeof(int)*size); 622 M->start_indices = (int*)malloc(sizeof(int)*size); 623 M->pe = (int*)malloc(sizeof(int)*n); 624 M->block_sizes = (int*)malloc(sizeof(int)*n); 625 for (i=0; i<n; i++) M->block_sizes[i] = 1; 626 627 PetscCallMPI(MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm)); 628 629 M->start_indices[0] = 0; 630 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 631 632 M->mnl = M->mnls[M->myid]; 633 M->my_start_index = M->start_indices[M->myid]; 634 635 for (i=0; i<size; i++) { 636 start_indx = M->start_indices[i]; 637 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 638 } 639 640 if (AT) { 641 M->lines = new_compressed_lines(M->mnls[rank],1); 642 } else { 643 M->lines = new_compressed_lines(M->mnls[rank],0); 644 } 645 646 rows = M->lines; 647 648 /* Determine the mapping from global indices to pointers */ 649 PetscCall(PetscMalloc1(M->n,&mapping)); 650 pe = 0; 651 local_indx = 0; 652 for (i=0; i<M->n; i++) { 653 if (local_indx >= M->mnls[pe]) { 654 pe++; 655 local_indx = 0; 656 } 657 mapping[i] = local_indx + M->start_indices[pe]; 658 local_indx++; 659 } 660 661 /*********************************************************/ 662 /************** Set up the row structure *****************/ 663 /*********************************************************/ 664 665 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 666 for (i=rstart; i<rend; i++) { 667 row_indx = i - rstart; 668 PetscCall(MatGetRow(A,i,&nz,&cols,&vals)); 669 /* allocate buffers */ 670 rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 671 rows->A[row_indx] = (double*)malloc(nz*sizeof(double)); 672 /* copy the matrix */ 673 for (j=0; j<nz; j++) { 674 col = cols[j]; 675 len = rows->len[row_indx]++; 676 677 rows->ptrs[row_indx][len] = mapping[col]; 678 rows->A[row_indx][len] = vals[j]; 679 } 680 rows->slen[row_indx] = rows->len[row_indx]; 681 682 PetscCall(MatRestoreRow(A,i,&nz,&cols,&vals)); 683 } 684 685 /************************************************************/ 686 /************** Set up the column structure *****************/ 687 /*********************************************************/ 688 689 if (AT) { 690 691 for (i=rstart; i<rend; i++) { 692 row_indx = i - rstart; 693 PetscCall(MatGetRow(AT,i,&nz,&cols,&vals)); 694 /* allocate buffers */ 695 rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 696 /* copy the matrix (i.e., the structure) */ 697 for (j=0; j<nz; j++) { 698 col = cols[j]; 699 len = rows->rlen[row_indx]++; 700 701 rows->rptrs[row_indx][len] = mapping[col]; 702 } 703 PetscCall(MatRestoreRow(AT,i,&nz,&cols,&vals)); 704 } 705 } 706 707 PetscCall(PetscFree(mapping)); 708 709 order_pointers(M); 710 M->maxnz = calc_maxnz(M); 711 *B = M; 712 PetscFunctionReturn(0); 713 } 714 715 /**********************************************************************/ 716 717 /* 718 Converts from an SPAI matrix B to a PETSc matrix PB. 719 This assumes that the SPAI matrix B is stored in 720 COMPRESSED-ROW format. 721 */ 722 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 723 { 724 PetscMPIInt size,rank; 725 int m,n,M,N; 726 int d_nz,o_nz; 727 int *d_nnz,*o_nnz; 728 int i,k,global_row,global_col,first_diag_col,last_diag_col; 729 PetscScalar val; 730 731 PetscFunctionBegin; 732 PetscCallMPI(MPI_Comm_size(comm,&size)); 733 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 734 735 m = n = B->mnls[rank]; 736 d_nz = o_nz = 0; 737 738 /* Determine preallocation for MatCreateAIJ */ 739 PetscCall(PetscMalloc1(m,&d_nnz)); 740 PetscCall(PetscMalloc1(m,&o_nnz)); 741 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 742 first_diag_col = B->start_indices[rank]; 743 last_diag_col = first_diag_col + B->mnls[rank]; 744 for (i=0; i<B->mnls[rank]; i++) { 745 for (k=0; k<B->lines->len[i]; k++) { 746 global_col = B->lines->ptrs[i][k]; 747 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 748 else o_nnz[i]++; 749 } 750 } 751 752 M = N = B->n; 753 /* Here we only know how to create AIJ format */ 754 PetscCall(MatCreate(comm,PB)); 755 PetscCall(MatSetSizes(*PB,m,n,M,N)); 756 PetscCall(MatSetType(*PB,MATAIJ)); 757 PetscCall(MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz)); 758 PetscCall(MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz)); 759 760 for (i=0; i<B->mnls[rank]; i++) { 761 global_row = B->start_indices[rank]+i; 762 for (k=0; k<B->lines->len[i]; k++) { 763 global_col = B->lines->ptrs[i][k]; 764 765 val = B->lines->A[i][k]; 766 PetscCall(MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES)); 767 } 768 } 769 770 PetscCall(PetscFree(d_nnz)); 771 PetscCall(PetscFree(o_nnz)); 772 773 PetscCall(MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY)); 774 PetscCall(MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY)); 775 PetscFunctionReturn(0); 776 } 777 778 /**********************************************************************/ 779 780 /* 781 Converts from an SPAI vector v to a PETSc vec Pv. 782 */ 783 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 784 { 785 PetscMPIInt size,rank; 786 int m,M,i,*mnls,*start_indices,*global_indices; 787 788 PetscFunctionBegin; 789 PetscCallMPI(MPI_Comm_size(comm,&size)); 790 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 791 792 m = v->mnl; 793 M = v->n; 794 795 PetscCall(VecCreateMPI(comm,m,M,Pv)); 796 797 PetscCall(PetscMalloc1(size,&mnls)); 798 PetscCallMPI(MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm)); 799 800 PetscCall(PetscMalloc1(size,&start_indices)); 801 802 start_indices[0] = 0; 803 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 804 805 PetscCall(PetscMalloc1(v->mnl,&global_indices)); 806 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 807 808 PetscCall(PetscFree(mnls)); 809 PetscCall(PetscFree(start_indices)); 810 811 PetscCall(VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES)); 812 PetscCall(VecAssemblyBegin(*Pv)); 813 PetscCall(VecAssemblyEnd(*Pv)); 814 815 PetscCall(PetscFree(global_indices)); 816 PetscFunctionReturn(0); 817 } 818