1 /* 2 3/99 Modified by Stephen Barnard to support SPAI version 3.0 3 */ 4 5 /* 6 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 7 Code written by Stephen Barnard. 8 9 Note: there is some BAD memory bleeding below! 10 11 This code needs work 12 13 1) get rid of all memory bleeding 14 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 15 rather than having the sp flag for PC_SPAI 16 3) fix to set the block size based on the matrix block size 17 18 */ 19 #if !defined(PETSC_SKIP_COMPLEX) 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 #endif 22 23 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 24 #include <../src/ksp/pc/impls/spai/petscspai.h> 25 26 /* 27 These are the SPAI include files 28 */ 29 EXTERN_C_BEGIN 30 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 31 #include <spai.h> 32 #include <matrix.h> 33 EXTERN_C_END 34 35 static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **); 36 static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *); 37 38 typedef struct { 39 matrix *B; /* matrix in SPAI format */ 40 matrix *BT; /* transpose of matrix in SPAI format */ 41 matrix *M; /* the approximate inverse in SPAI format */ 42 43 Mat PM; /* the approximate inverse PETSc format */ 44 45 double epsilon; /* tolerance */ 46 int nbsteps; /* max number of "improvement" steps per line */ 47 int max; /* max dimensions of is_I, q, etc. */ 48 int maxnew; /* max number of new entries per step */ 49 int block_size; /* constant block size */ 50 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 51 int verbose; /* SPAI prints timing and statistics */ 52 53 int sp; /* symmetric nonzero pattern */ 54 MPI_Comm comm_spai; /* communicator to be used with spai */ 55 } PC_SPAI; 56 57 static PetscErrorCode PCSetUp_SPAI(PC pc) 58 { 59 PC_SPAI *ispai = (PC_SPAI *)pc->data; 60 Mat AT; 61 62 PetscFunctionBegin; 63 init_SPAI(); 64 65 if (ispai->sp) { 66 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B)); 67 } else { 68 /* Use the transpose to get the column nonzero structure. */ 69 PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT)); 70 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B)); 71 PetscCall(MatDestroy(&AT)); 72 } 73 74 /* Destroy the transpose */ 75 /* Don't know how to do it. PETSc developers? */ 76 77 /* construct SPAI preconditioner */ 78 /* FILE *messages */ /* file for warning messages */ 79 /* double epsilon */ /* tolerance */ 80 /* int nbsteps */ /* max number of "improvement" steps per line */ 81 /* int max */ /* max dimensions of is_I, q, etc. */ 82 /* int maxnew */ /* max number of new entries per step */ 83 /* int block_size */ /* block_size == 1 specifies scalar elements 84 block_size == n specifies nxn constant-block elements 85 block_size == 0 specifies variable-block elements */ 86 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 87 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 88 verbose == 1 prints timing and matrix statistics */ 89 90 PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose); 91 92 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM)); 93 94 /* free the SPAI matrices */ 95 sp_free_matrix(ispai->B); 96 sp_free_matrix(ispai->M); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) 101 { 102 PC_SPAI *ispai = (PC_SPAI *)pc->data; 103 104 PetscFunctionBegin; 105 /* Now using PETSc's multiply */ 106 PetscCall(MatMult(ispai->PM, xx, y)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) 111 { 112 PC_SPAI *ispai = (PC_SPAI *)pc->data; 113 114 PetscFunctionBegin; 115 /* Now using PETSc's multiply */ 116 PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 static PetscErrorCode PCDestroy_SPAI(PC pc) 121 { 122 PC_SPAI *ispai = (PC_SPAI *)pc->data; 123 124 PetscFunctionBegin; 125 PetscCall(MatDestroy(&ispai->PM)); 126 PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai))); 127 PetscCall(PetscFree(pc->data)); 128 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL)); 129 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL)); 130 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL)); 131 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL)); 132 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL)); 133 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL)); 134 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL)); 135 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL)); 136 PetscFunctionReturn(PETSC_SUCCESS); 137 } 138 139 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) 140 { 141 PC_SPAI *ispai = (PC_SPAI *)pc->data; 142 PetscBool iascii; 143 144 PetscFunctionBegin; 145 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 146 if (iascii) { 147 PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon)); 148 PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps)); 149 PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max)); 150 PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew)); 151 PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size)); 152 PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size)); 153 PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose)); 154 PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp)); 155 } 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) 160 { 161 PC_SPAI *ispai = (PC_SPAI *)pc->data; 162 163 PetscFunctionBegin; 164 ispai->epsilon = (double)epsilon1; 165 PetscFunctionReturn(PETSC_SUCCESS); 166 } 167 168 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) 169 { 170 PC_SPAI *ispai = (PC_SPAI *)pc->data; 171 172 PetscFunctionBegin; 173 ispai->nbsteps = (int)nbsteps1; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 /* added 1/7/99 g.h. */ 178 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) 179 { 180 PC_SPAI *ispai = (PC_SPAI *)pc->data; 181 182 PetscFunctionBegin; 183 ispai->max = (int)max1; 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) 188 { 189 PC_SPAI *ispai = (PC_SPAI *)pc->data; 190 191 PetscFunctionBegin; 192 ispai->maxnew = (int)maxnew1; 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) 197 { 198 PC_SPAI *ispai = (PC_SPAI *)pc->data; 199 200 PetscFunctionBegin; 201 ispai->block_size = (int)block_size1; 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) 206 { 207 PC_SPAI *ispai = (PC_SPAI *)pc->data; 208 209 PetscFunctionBegin; 210 ispai->cache_size = (int)cache_size; 211 PetscFunctionReturn(PETSC_SUCCESS); 212 } 213 214 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) 215 { 216 PC_SPAI *ispai = (PC_SPAI *)pc->data; 217 218 PetscFunctionBegin; 219 ispai->verbose = (int)verbose; 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) 224 { 225 PC_SPAI *ispai = (PC_SPAI *)pc->data; 226 227 PetscFunctionBegin; 228 ispai->sp = (int)sp; 229 PetscFunctionReturn(PETSC_SUCCESS); 230 } 231 232 /*@ 233 PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner 234 235 Input Parameters: 236 + pc - the preconditioner 237 - epsilon1 - epsilon (default .4) 238 239 Note: 240 Espilon must be between 0 and 1. It controls the 241 quality of the approximation of M to the inverse of 242 A. Higher values of epsilon lead to more work, more 243 fill, and usually better preconditioners. In many 244 cases the best choice of epsilon is the one that 245 divides the total solution time equally between the 246 preconditioner and the solver. 247 248 Level: intermediate 249 250 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 251 @*/ 252 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) 253 { 254 PetscFunctionBegin; 255 PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1)); 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 /*@ 260 PCSPAISetNBSteps - set maximum number of improvement steps per row in 261 the `PCSPAI` preconditioner 262 263 Input Parameters: 264 + pc - the preconditioner 265 - nbsteps1 - number of steps (default 5) 266 267 Note: 268 `PCSPAI` constructs to approximation to every column of 269 the exact inverse of A in a series of improvement 270 steps. The quality of the approximation is determined 271 by epsilon. If an approximation achieving an accuracy 272 of epsilon is not obtained after ns steps, SPAI simply 273 uses the best approximation constructed so far. 274 275 Level: intermediate 276 277 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()` 278 @*/ 279 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) 280 { 281 PetscFunctionBegin; 282 PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1)); 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 /* added 1/7/99 g.h. */ 287 /*@ 288 PCSPAISetMax - set the size of various working buffers in 289 the `PCSPAI` preconditioner 290 291 Input Parameters: 292 + pc - the preconditioner 293 - max1 - size (default is 5000) 294 295 Level: intermediate 296 297 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 298 @*/ 299 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) 300 { 301 PetscFunctionBegin; 302 PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1)); 303 PetscFunctionReturn(PETSC_SUCCESS); 304 } 305 306 /*@ 307 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 308 in `PCSPAI` preconditioner 309 310 Input Parameters: 311 + pc - the preconditioner 312 - maxnew1 - maximum number (default 5) 313 314 Level: intermediate 315 316 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()` 317 @*/ 318 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) 319 { 320 PetscFunctionBegin; 321 PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1)); 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@ 326 PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner 327 328 Input Parameters: 329 + pc - the preconditioner 330 - block_size1 - block size (default 1) 331 332 Notes: 333 A block 334 size of 1 treats A as a matrix of scalar elements. A 335 block size of s > 1 treats A as a matrix of sxs 336 blocks. A block size of 0 treats A as a matrix with 337 variable sized blocks, which are determined by 338 searching for dense square diagonal blocks in A. 339 This can be very effective for finite-element 340 matrices. 341 342 SPAI will convert A to block form, use a block 343 version of the preconditioner algorithm, and then 344 convert the result back to scalar form. 345 346 In many cases the a block-size parameter other than 1 347 can lead to very significant improvement in 348 performance. 349 350 Level: intermediate 351 352 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 353 @*/ 354 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) 355 { 356 PetscFunctionBegin; 357 PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 361 /*@ 362 PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner 363 364 Input Parameters: 365 + pc - the preconditioner 366 - cache_size - cache size {0,1,2,3,4,5} (default 5) 367 368 Note: 369 `PCSPAI` uses a hash table to cache messages and avoid 370 redundant communication. If suggest always using 371 5. This parameter is irrelevant in the serial 372 version. 373 374 Level: intermediate 375 376 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 377 @*/ 378 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) 379 { 380 PetscFunctionBegin; 381 PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size)); 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } 384 385 /*@ 386 PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner 387 388 Input Parameters: 389 + pc - the preconditioner 390 - verbose - level (default 1) 391 392 Note: 393 print parameters, timings and matrix statistics 394 395 Level: intermediate 396 397 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 398 @*/ 399 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) 400 { 401 PetscFunctionBegin; 402 PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose)); 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 /*@ 407 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner 408 409 Input Parameters: 410 + pc - the preconditioner 411 - sp - 0 or 1 412 413 Note: 414 If A has a symmetric nonzero pattern use -sp 1 to 415 improve performance by eliminating some communication 416 in the parallel version. Even if A does not have a 417 symmetric nonzero pattern -sp 1 may well lead to good 418 results, but the code will not follow the published 419 SPAI algorithm exactly. 420 421 Level: intermediate 422 423 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 424 @*/ 425 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) 426 { 427 PetscFunctionBegin; 428 PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp)); 429 PetscFunctionReturn(PETSC_SUCCESS); 430 } 431 432 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject) 433 { 434 PC_SPAI *ispai = (PC_SPAI *)pc->data; 435 int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp; 436 double epsilon1; 437 PetscBool flg; 438 439 PetscFunctionBegin; 440 PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options"); 441 PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg)); 442 if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1)); 443 PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg)); 444 if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1)); 445 /* added 1/7/99 g.h. */ 446 PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg)); 447 if (flg) PetscCall(PCSPAISetMax(pc, max1)); 448 PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg)); 449 if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1)); 450 PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg)); 451 if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1)); 452 PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg)); 453 if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size)); 454 PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg)); 455 if (flg) PetscCall(PCSPAISetVerbose(pc, verbose)); 456 PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg)); 457 if (flg) PetscCall(PCSPAISetSp(pc, sp)); 458 PetscOptionsHeadEnd(); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 /*MC 463 PCSPAI - Use the Sparse Approximate Inverse method 464 465 Options Database Keys: 466 + -pc_spai_epsilon <eps> - set tolerance 467 . -pc_spai_nbstep <n> - set nbsteps 468 . -pc_spai_max <m> - set max 469 . -pc_spai_max_new <m> - set maxnew 470 . -pc_spai_block_size <n> - set block size 471 . -pc_spai_cache_size <n> - set cache size 472 . -pc_spai_sp <m> - set sp 473 - -pc_spai_set_verbose <true,false> - verbose output 474 475 Level: beginner 476 477 Note: 478 This only works with `MATAIJ` matrices. 479 480 References: 481 . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3) 482 483 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 484 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`, 485 `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()` 486 M*/ 487 488 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 489 { 490 PC_SPAI *ispai; 491 492 PetscFunctionBegin; 493 PetscCall(PetscNew(&ispai)); 494 pc->data = ispai; 495 496 pc->ops->destroy = PCDestroy_SPAI; 497 pc->ops->apply = PCApply_SPAI; 498 pc->ops->matapply = PCMatApply_SPAI; 499 pc->ops->applyrichardson = 0; 500 pc->ops->setup = PCSetUp_SPAI; 501 pc->ops->view = PCView_SPAI; 502 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 503 504 ispai->epsilon = .4; 505 ispai->nbsteps = 5; 506 ispai->max = 5000; 507 ispai->maxnew = 5; 508 ispai->block_size = 1; 509 ispai->cache_size = 5; 510 ispai->verbose = 0; 511 512 ispai->sp = 1; 513 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai))); 514 515 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI)); 516 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI)); 517 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI)); 518 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI)); 519 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI)); 520 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI)); 521 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI)); 522 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI)); 523 PetscFunctionReturn(PETSC_SUCCESS); 524 } 525 526 /* 527 Converts from a PETSc matrix to an SPAI matrix 528 */ 529 static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) 530 { 531 matrix *M; 532 int i, j, col; 533 int row_indx; 534 int len, pe, local_indx, start_indx; 535 int *mapping; 536 const int *cols; 537 const double *vals; 538 int n, mnl, nnl, nz, rstart, rend; 539 PetscMPIInt size, rank; 540 struct compressed_lines *rows; 541 542 PetscFunctionBegin; 543 PetscCallMPI(MPI_Comm_size(comm, &size)); 544 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 545 PetscCall(MatGetSize(A, &n, &n)); 546 PetscCall(MatGetLocalSize(A, &mnl, &nnl)); 547 548 /* 549 not sure why a barrier is required. commenting out 550 PetscCallMPI(MPI_Barrier(comm)); 551 */ 552 553 M = new_matrix((SPAI_Comm)comm); 554 555 M->n = n; 556 M->bs = 1; 557 M->max_block_size = 1; 558 559 M->mnls = (int *)malloc(sizeof(int) * size); 560 M->start_indices = (int *)malloc(sizeof(int) * size); 561 M->pe = (int *)malloc(sizeof(int) * n); 562 M->block_sizes = (int *)malloc(sizeof(int) * n); 563 for (i = 0; i < n; i++) M->block_sizes[i] = 1; 564 565 PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm)); 566 567 M->start_indices[0] = 0; 568 for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1]; 569 570 M->mnl = M->mnls[M->myid]; 571 M->my_start_index = M->start_indices[M->myid]; 572 573 for (i = 0; i < size; i++) { 574 start_indx = M->start_indices[i]; 575 for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i; 576 } 577 578 if (AT) { 579 M->lines = new_compressed_lines(M->mnls[rank], 1); 580 } else { 581 M->lines = new_compressed_lines(M->mnls[rank], 0); 582 } 583 584 rows = M->lines; 585 586 /* Determine the mapping from global indices to pointers */ 587 PetscCall(PetscMalloc1(M->n, &mapping)); 588 pe = 0; 589 local_indx = 0; 590 for (i = 0; i < M->n; i++) { 591 if (local_indx >= M->mnls[pe]) { 592 pe++; 593 local_indx = 0; 594 } 595 mapping[i] = local_indx + M->start_indices[pe]; 596 local_indx++; 597 } 598 599 /************** Set up the row structure *****************/ 600 601 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 602 for (i = rstart; i < rend; i++) { 603 row_indx = i - rstart; 604 PetscCall(MatGetRow(A, i, &nz, &cols, &vals)); 605 /* allocate buffers */ 606 rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 607 rows->A[row_indx] = (double *)malloc(nz * sizeof(double)); 608 /* copy the matrix */ 609 for (j = 0; j < nz; j++) { 610 col = cols[j]; 611 len = rows->len[row_indx]++; 612 613 rows->ptrs[row_indx][len] = mapping[col]; 614 rows->A[row_indx][len] = vals[j]; 615 } 616 rows->slen[row_indx] = rows->len[row_indx]; 617 618 PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals)); 619 } 620 621 /************** Set up the column structure *****************/ 622 623 if (AT) { 624 for (i = rstart; i < rend; i++) { 625 row_indx = i - rstart; 626 PetscCall(MatGetRow(AT, i, &nz, &cols, &vals)); 627 /* allocate buffers */ 628 rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 629 /* copy the matrix (i.e., the structure) */ 630 for (j = 0; j < nz; j++) { 631 col = cols[j]; 632 len = rows->rlen[row_indx]++; 633 634 rows->rptrs[row_indx][len] = mapping[col]; 635 } 636 PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals)); 637 } 638 } 639 640 PetscCall(PetscFree(mapping)); 641 642 order_pointers(M); 643 M->maxnz = calc_maxnz(M); 644 *B = M; 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 /* 649 Converts from an SPAI matrix B to a PETSc matrix PB. 650 This assumes that the SPAI matrix B is stored in 651 COMPRESSED-ROW format. 652 */ 653 static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) 654 { 655 PetscMPIInt size, rank; 656 int m, n, M, N; 657 int d_nz, o_nz; 658 int *d_nnz, *o_nnz; 659 int i, k, global_row, global_col, first_diag_col, last_diag_col; 660 PetscScalar val; 661 662 PetscFunctionBegin; 663 PetscCallMPI(MPI_Comm_size(comm, &size)); 664 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 665 666 m = n = B->mnls[rank]; 667 d_nz = o_nz = 0; 668 669 /* Determine preallocation for MatCreateAIJ */ 670 PetscCall(PetscMalloc1(m, &d_nnz)); 671 PetscCall(PetscMalloc1(m, &o_nnz)); 672 for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0; 673 first_diag_col = B->start_indices[rank]; 674 last_diag_col = first_diag_col + B->mnls[rank]; 675 for (i = 0; i < B->mnls[rank]; i++) { 676 for (k = 0; k < B->lines->len[i]; k++) { 677 global_col = B->lines->ptrs[i][k]; 678 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 679 else o_nnz[i]++; 680 } 681 } 682 683 M = N = B->n; 684 /* Here we only know how to create AIJ format */ 685 PetscCall(MatCreate(comm, PB)); 686 PetscCall(MatSetSizes(*PB, m, n, M, N)); 687 PetscCall(MatSetType(*PB, MATAIJ)); 688 PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz)); 689 PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz)); 690 691 for (i = 0; i < B->mnls[rank]; i++) { 692 global_row = B->start_indices[rank] + i; 693 for (k = 0; k < B->lines->len[i]; k++) { 694 global_col = B->lines->ptrs[i][k]; 695 696 val = B->lines->A[i][k]; 697 PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES)); 698 } 699 } 700 701 PetscCall(PetscFree(d_nnz)); 702 PetscCall(PetscFree(o_nnz)); 703 704 PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY)); 705 PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY)); 706 PetscFunctionReturn(PETSC_SUCCESS); 707 } 708