1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *); 37 extern PetscErrorCode MM_to_PETSC(char *, char *, char *); 38 39 typedef struct { 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 /**********************************************************************/ 59 60 static PetscErrorCode PCSetUp_SPAI(PC pc) { 61 PC_SPAI *ispai = (PC_SPAI *)pc->data; 62 Mat AT; 63 64 PetscFunctionBegin; 65 init_SPAI(); 66 67 if (ispai->sp) { 68 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B)); 69 } else { 70 /* Use the transpose to get the column nonzero structure. */ 71 PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT)); 72 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B)); 73 PetscCall(MatDestroy(&AT)); 74 } 75 76 /* Destroy the transpose */ 77 /* Don't know how to do it. PETSc developers? */ 78 79 /* construct SPAI preconditioner */ 80 /* FILE *messages */ /* file for warning messages */ 81 /* double epsilon */ /* tolerance */ 82 /* int nbsteps */ /* max number of "improvement" steps per line */ 83 /* int max */ /* max dimensions of is_I, q, etc. */ 84 /* int maxnew */ /* max number of new entries per step */ 85 /* int block_size */ /* block_size == 1 specifies scalar elments 86 block_size == n specifies nxn constant-block elements 87 block_size == 0 specifies variable-block elements */ 88 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 89 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 90 verbose == 1 prints timing and matrix statistics */ 91 92 PetscCall(bspai(ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose)); 93 94 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM)); 95 96 /* free the SPAI matrices */ 97 sp_free_matrix(ispai->B); 98 sp_free_matrix(ispai->M); 99 PetscFunctionReturn(0); 100 } 101 102 /**********************************************************************/ 103 104 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) { 105 PC_SPAI *ispai = (PC_SPAI *)pc->data; 106 107 PetscFunctionBegin; 108 /* Now using PETSc's multiply */ 109 PetscCall(MatMult(ispai->PM, xx, y)); 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) { 114 PC_SPAI *ispai = (PC_SPAI *)pc->data; 115 116 PetscFunctionBegin; 117 /* Now using PETSc's multiply */ 118 PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 119 PetscFunctionReturn(0); 120 } 121 122 /**********************************************************************/ 123 124 static PetscErrorCode PCDestroy_SPAI(PC pc) { 125 PC_SPAI *ispai = (PC_SPAI *)pc->data; 126 127 PetscFunctionBegin; 128 PetscCall(MatDestroy(&ispai->PM)); 129 PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai))); 130 PetscCall(PetscFree(pc->data)); 131 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL)); 132 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL)); 133 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL)); 134 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL)); 135 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL)); 136 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL)); 137 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL)); 138 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL)); 139 PetscFunctionReturn(0); 140 } 141 142 /**********************************************************************/ 143 144 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) { 145 PC_SPAI *ispai = (PC_SPAI *)pc->data; 146 PetscBool iascii; 147 148 PetscFunctionBegin; 149 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 150 if (iascii) { 151 PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon)); 152 PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps)); 153 PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max)); 154 PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew)); 155 PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size)); 156 PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size)); 157 PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose)); 158 PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp)); 159 } 160 PetscFunctionReturn(0); 161 } 162 163 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) { 164 PC_SPAI *ispai = (PC_SPAI *)pc->data; 165 166 PetscFunctionBegin; 167 ispai->epsilon = (double)epsilon1; 168 PetscFunctionReturn(0); 169 } 170 171 /**********************************************************************/ 172 173 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) { 174 PC_SPAI *ispai = (PC_SPAI *)pc->data; 175 176 PetscFunctionBegin; 177 ispai->nbsteps = (int)nbsteps1; 178 PetscFunctionReturn(0); 179 } 180 181 /**********************************************************************/ 182 183 /* added 1/7/99 g.h. */ 184 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) { 185 PC_SPAI *ispai = (PC_SPAI *)pc->data; 186 187 PetscFunctionBegin; 188 ispai->max = (int)max1; 189 PetscFunctionReturn(0); 190 } 191 192 /**********************************************************************/ 193 194 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) { 195 PC_SPAI *ispai = (PC_SPAI *)pc->data; 196 197 PetscFunctionBegin; 198 ispai->maxnew = (int)maxnew1; 199 PetscFunctionReturn(0); 200 } 201 202 /**********************************************************************/ 203 204 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) { 205 PC_SPAI *ispai = (PC_SPAI *)pc->data; 206 207 PetscFunctionBegin; 208 ispai->block_size = (int)block_size1; 209 PetscFunctionReturn(0); 210 } 211 212 /**********************************************************************/ 213 214 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) { 215 PC_SPAI *ispai = (PC_SPAI *)pc->data; 216 217 PetscFunctionBegin; 218 ispai->cache_size = (int)cache_size; 219 PetscFunctionReturn(0); 220 } 221 222 /**********************************************************************/ 223 224 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) { 225 PC_SPAI *ispai = (PC_SPAI *)pc->data; 226 227 PetscFunctionBegin; 228 ispai->verbose = (int)verbose; 229 PetscFunctionReturn(0); 230 } 231 232 /**********************************************************************/ 233 234 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) { 235 PC_SPAI *ispai = (PC_SPAI *)pc->data; 236 237 PetscFunctionBegin; 238 ispai->sp = (int)sp; 239 PetscFunctionReturn(0); 240 } 241 242 /* -------------------------------------------------------------------*/ 243 244 /*@ 245 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 246 247 Input Parameters: 248 + pc - the preconditioner 249 - eps - epsilon (default .4) 250 251 Notes: 252 Espilon must be between 0 and 1. It controls the 253 quality of the approximation of M to the inverse of 254 A. Higher values of epsilon lead to more work, more 255 fill, and usually better preconditioners. In many 256 cases the best choice of epsilon is the one that 257 divides the total solution time equally between the 258 preconditioner and the solver. 259 260 Level: intermediate 261 262 .seealso: `PCSPAI`, `PCSetType()` 263 @*/ 264 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) { 265 PetscFunctionBegin; 266 PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1)); 267 PetscFunctionReturn(0); 268 } 269 270 /**********************************************************************/ 271 272 /*@ 273 PCSPAISetNBSteps - set maximum number of improvement steps per row in 274 the SPAI preconditioner 275 276 Input Parameters: 277 + pc - the preconditioner 278 - n - number of steps (default 5) 279 280 Notes: 281 SPAI constructs to approximation to every column of 282 the exact inverse of A in a series of improvement 283 steps. The quality of the approximation is determined 284 by epsilon. If an approximation achieving an accuracy 285 of epsilon is not obtained after ns steps, SPAI simply 286 uses the best approximation constructed so far. 287 288 Level: intermediate 289 290 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()` 291 @*/ 292 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) { 293 PetscFunctionBegin; 294 PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1)); 295 PetscFunctionReturn(0); 296 } 297 298 /**********************************************************************/ 299 300 /* added 1/7/99 g.h. */ 301 /*@ 302 PCSPAISetMax - set the size of various working buffers in 303 the SPAI preconditioner 304 305 Input Parameters: 306 + pc - the preconditioner 307 - n - size (default is 5000) 308 309 Level: intermediate 310 311 .seealso: `PCSPAI`, `PCSetType()` 312 @*/ 313 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) { 314 PetscFunctionBegin; 315 PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1)); 316 PetscFunctionReturn(0); 317 } 318 319 /**********************************************************************/ 320 321 /*@ 322 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 323 in SPAI preconditioner 324 325 Input Parameters: 326 + pc - the preconditioner 327 - n - maximum number (default 5) 328 329 Level: intermediate 330 331 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()` 332 @*/ 333 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) { 334 PetscFunctionBegin; 335 PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1)); 336 PetscFunctionReturn(0); 337 } 338 339 /**********************************************************************/ 340 341 /*@ 342 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 343 344 Input Parameters: 345 + pc - the preconditioner 346 - n - block size (default 1) 347 348 Notes: 349 A block 350 size of 1 treats A as a matrix of scalar elements. A 351 block size of s > 1 treats A as a matrix of sxs 352 blocks. A block size of 0 treats A as a matrix with 353 variable sized blocks, which are determined by 354 searching for dense square diagonal blocks in A. 355 This can be very effective for finite-element 356 matrices. 357 358 SPAI will convert A to block form, use a block 359 version of the preconditioner algorithm, and then 360 convert the result back to scalar form. 361 362 In many cases the a block-size parameter other than 1 363 can lead to very significant improvement in 364 performance. 365 366 Level: intermediate 367 368 .seealso: `PCSPAI`, `PCSetType()` 369 @*/ 370 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) { 371 PetscFunctionBegin; 372 PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1)); 373 PetscFunctionReturn(0); 374 } 375 376 /**********************************************************************/ 377 378 /*@ 379 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 380 381 Input Parameters: 382 + pc - the preconditioner 383 - n - cache size {0,1,2,3,4,5} (default 5) 384 385 Notes: 386 SPAI uses a hash table to cache messages and avoid 387 redundant communication. If suggest always using 388 5. This parameter is irrelevant in the serial 389 version. 390 391 Level: intermediate 392 393 .seealso: `PCSPAI`, `PCSetType()` 394 @*/ 395 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) { 396 PetscFunctionBegin; 397 PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size)); 398 PetscFunctionReturn(0); 399 } 400 401 /**********************************************************************/ 402 403 /*@ 404 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 405 406 Input Parameters: 407 + pc - the preconditioner 408 - n - level (default 1) 409 410 Notes: 411 print parameters, timings and matrix statistics 412 413 Level: intermediate 414 415 .seealso: `PCSPAI`, `PCSetType()` 416 @*/ 417 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) { 418 PetscFunctionBegin; 419 PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose)); 420 PetscFunctionReturn(0); 421 } 422 423 /**********************************************************************/ 424 425 /*@ 426 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 427 428 Input Parameters: 429 + pc - the preconditioner 430 - n - 0 or 1 431 432 Notes: 433 If A has a symmetric nonzero pattern use -sp 1 to 434 improve performance by eliminating some communication 435 in the parallel version. Even if A does not have a 436 symmetric nonzero pattern -sp 1 may well lead to good 437 results, but the code will not follow the published 438 SPAI algorithm exactly. 439 440 Level: intermediate 441 442 .seealso: `PCSPAI`, `PCSetType()` 443 @*/ 444 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) { 445 PetscFunctionBegin; 446 PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp)); 447 PetscFunctionReturn(0); 448 } 449 450 /**********************************************************************/ 451 452 /**********************************************************************/ 453 454 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject) { 455 PC_SPAI *ispai = (PC_SPAI *)pc->data; 456 int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp; 457 double epsilon1; 458 PetscBool flg; 459 460 PetscFunctionBegin; 461 PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options"); 462 PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg)); 463 if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1)); 464 PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg)); 465 if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1)); 466 /* added 1/7/99 g.h. */ 467 PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg)); 468 if (flg) PetscCall(PCSPAISetMax(pc, max1)); 469 PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg)); 470 if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1)); 471 PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg)); 472 if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1)); 473 PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg)); 474 if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size)); 475 PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg)); 476 if (flg) PetscCall(PCSPAISetVerbose(pc, verbose)); 477 PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg)); 478 if (flg) PetscCall(PCSPAISetSp(pc, sp)); 479 PetscOptionsHeadEnd(); 480 PetscFunctionReturn(0); 481 } 482 483 /**********************************************************************/ 484 485 /*MC 486 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 487 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 488 489 Options Database Keys: 490 + -pc_spai_epsilon <eps> - set tolerance 491 . -pc_spai_nbstep <n> - set nbsteps 492 . -pc_spai_max <m> - set max 493 . -pc_spai_max_new <m> - set maxnew 494 . -pc_spai_block_size <n> - set block size 495 . -pc_spai_cache_size <n> - set cache size 496 . -pc_spai_sp <m> - set sp 497 - -pc_spai_set_verbose <true,false> - verbose output 498 499 Notes: 500 This only works with AIJ matrices. 501 502 Level: beginner 503 504 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 505 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`, 506 `PCSPAISetVerbose()`, `PCSPAISetSp()` 507 M*/ 508 509 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) { 510 PC_SPAI *ispai; 511 512 PetscFunctionBegin; 513 PetscCall(PetscNewLog(pc, &ispai)); 514 pc->data = ispai; 515 516 pc->ops->destroy = PCDestroy_SPAI; 517 pc->ops->apply = PCApply_SPAI; 518 pc->ops->matapply = PCMatApply_SPAI; 519 pc->ops->applyrichardson = 0; 520 pc->ops->setup = PCSetUp_SPAI; 521 pc->ops->view = PCView_SPAI; 522 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 523 524 ispai->epsilon = .4; 525 ispai->nbsteps = 5; 526 ispai->max = 5000; 527 ispai->maxnew = 5; 528 ispai->block_size = 1; 529 ispai->cache_size = 5; 530 ispai->verbose = 0; 531 532 ispai->sp = 1; 533 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai))); 534 535 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI)); 536 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI)); 537 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI)); 538 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI)); 539 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI)); 540 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI)); 541 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI)); 542 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI)); 543 PetscFunctionReturn(0); 544 } 545 546 /**********************************************************************/ 547 548 /* 549 Converts from a PETSc matrix to an SPAI matrix 550 */ 551 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) { 552 matrix *M; 553 int i, j, col; 554 int row_indx; 555 int len, pe, local_indx, start_indx; 556 int *mapping; 557 const int *cols; 558 const double *vals; 559 int n, mnl, nnl, nz, rstart, rend; 560 PetscMPIInt size, rank; 561 struct compressed_lines *rows; 562 563 PetscFunctionBegin; 564 PetscCallMPI(MPI_Comm_size(comm, &size)); 565 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 566 PetscCall(MatGetSize(A, &n, &n)); 567 PetscCall(MatGetLocalSize(A, &mnl, &nnl)); 568 569 /* 570 not sure why a barrier is required. commenting out 571 PetscCallMPI(MPI_Barrier(comm)); 572 */ 573 574 M = new_matrix((SPAI_Comm)comm); 575 576 M->n = n; 577 M->bs = 1; 578 M->max_block_size = 1; 579 580 M->mnls = (int *)malloc(sizeof(int) * size); 581 M->start_indices = (int *)malloc(sizeof(int) * size); 582 M->pe = (int *)malloc(sizeof(int) * n); 583 M->block_sizes = (int *)malloc(sizeof(int) * n); 584 for (i = 0; i < n; i++) M->block_sizes[i] = 1; 585 586 PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm)); 587 588 M->start_indices[0] = 0; 589 for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1]; 590 591 M->mnl = M->mnls[M->myid]; 592 M->my_start_index = M->start_indices[M->myid]; 593 594 for (i = 0; i < size; i++) { 595 start_indx = M->start_indices[i]; 596 for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i; 597 } 598 599 if (AT) { 600 M->lines = new_compressed_lines(M->mnls[rank], 1); 601 } else { 602 M->lines = new_compressed_lines(M->mnls[rank], 0); 603 } 604 605 rows = M->lines; 606 607 /* Determine the mapping from global indices to pointers */ 608 PetscCall(PetscMalloc1(M->n, &mapping)); 609 pe = 0; 610 local_indx = 0; 611 for (i = 0; i < M->n; i++) { 612 if (local_indx >= M->mnls[pe]) { 613 pe++; 614 local_indx = 0; 615 } 616 mapping[i] = local_indx + M->start_indices[pe]; 617 local_indx++; 618 } 619 620 /*********************************************************/ 621 /************** Set up the row structure *****************/ 622 /*********************************************************/ 623 624 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 625 for (i = rstart; i < rend; i++) { 626 row_indx = i - rstart; 627 PetscCall(MatGetRow(A, i, &nz, &cols, &vals)); 628 /* allocate buffers */ 629 rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 630 rows->A[row_indx] = (double *)malloc(nz * sizeof(double)); 631 /* copy the matrix */ 632 for (j = 0; j < nz; j++) { 633 col = cols[j]; 634 len = rows->len[row_indx]++; 635 636 rows->ptrs[row_indx][len] = mapping[col]; 637 rows->A[row_indx][len] = vals[j]; 638 } 639 rows->slen[row_indx] = rows->len[row_indx]; 640 641 PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals)); 642 } 643 644 /************************************************************/ 645 /************** Set up the column structure *****************/ 646 /*********************************************************/ 647 648 if (AT) { 649 for (i = rstart; i < rend; i++) { 650 row_indx = i - rstart; 651 PetscCall(MatGetRow(AT, i, &nz, &cols, &vals)); 652 /* allocate buffers */ 653 rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 654 /* copy the matrix (i.e., the structure) */ 655 for (j = 0; j < nz; j++) { 656 col = cols[j]; 657 len = rows->rlen[row_indx]++; 658 659 rows->rptrs[row_indx][len] = mapping[col]; 660 } 661 PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals)); 662 } 663 } 664 665 PetscCall(PetscFree(mapping)); 666 667 order_pointers(M); 668 M->maxnz = calc_maxnz(M); 669 *B = M; 670 PetscFunctionReturn(0); 671 } 672 673 /**********************************************************************/ 674 675 /* 676 Converts from an SPAI matrix B to a PETSc matrix PB. 677 This assumes that the SPAI matrix B is stored in 678 COMPRESSED-ROW format. 679 */ 680 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) { 681 PetscMPIInt size, rank; 682 int m, n, M, N; 683 int d_nz, o_nz; 684 int *d_nnz, *o_nnz; 685 int i, k, global_row, global_col, first_diag_col, last_diag_col; 686 PetscScalar val; 687 688 PetscFunctionBegin; 689 PetscCallMPI(MPI_Comm_size(comm, &size)); 690 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 691 692 m = n = B->mnls[rank]; 693 d_nz = o_nz = 0; 694 695 /* Determine preallocation for MatCreateAIJ */ 696 PetscCall(PetscMalloc1(m, &d_nnz)); 697 PetscCall(PetscMalloc1(m, &o_nnz)); 698 for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0; 699 first_diag_col = B->start_indices[rank]; 700 last_diag_col = first_diag_col + B->mnls[rank]; 701 for (i = 0; i < B->mnls[rank]; i++) { 702 for (k = 0; k < B->lines->len[i]; k++) { 703 global_col = B->lines->ptrs[i][k]; 704 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 705 else o_nnz[i]++; 706 } 707 } 708 709 M = N = B->n; 710 /* Here we only know how to create AIJ format */ 711 PetscCall(MatCreate(comm, PB)); 712 PetscCall(MatSetSizes(*PB, m, n, M, N)); 713 PetscCall(MatSetType(*PB, MATAIJ)); 714 PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz)); 715 PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz)); 716 717 for (i = 0; i < B->mnls[rank]; i++) { 718 global_row = B->start_indices[rank] + i; 719 for (k = 0; k < B->lines->len[i]; k++) { 720 global_col = B->lines->ptrs[i][k]; 721 722 val = B->lines->A[i][k]; 723 PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES)); 724 } 725 } 726 727 PetscCall(PetscFree(d_nnz)); 728 PetscCall(PetscFree(o_nnz)); 729 730 PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY)); 731 PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY)); 732 PetscFunctionReturn(0); 733 } 734 735 /**********************************************************************/ 736 737 /* 738 Converts from an SPAI vector v to a PETSc vec Pv. 739 */ 740 PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv) { 741 PetscMPIInt size, rank; 742 int m, M, i, *mnls, *start_indices, *global_indices; 743 744 PetscFunctionBegin; 745 PetscCallMPI(MPI_Comm_size(comm, &size)); 746 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 747 748 m = v->mnl; 749 M = v->n; 750 751 PetscCall(VecCreateMPI(comm, m, M, Pv)); 752 753 PetscCall(PetscMalloc1(size, &mnls)); 754 PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm)); 755 756 PetscCall(PetscMalloc1(size, &start_indices)); 757 758 start_indices[0] = 0; 759 for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1]; 760 761 PetscCall(PetscMalloc1(v->mnl, &global_indices)); 762 for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i; 763 764 PetscCall(PetscFree(mnls)); 765 PetscCall(PetscFree(start_indices)); 766 767 PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES)); 768 PetscCall(VecAssemblyBegin(*Pv)); 769 PetscCall(VecAssemblyEnd(*Pv)); 770 771 PetscCall(PetscFree(global_indices)); 772 PetscFunctionReturn(0); 773 } 774