1 /* 2 3/99 Modified by Stephen Barnard to support SPAI version 3.0 3 */ 4 5 /* 6 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 7 Code written by Stephen Barnard. 8 9 Note: there is some BAD memory bleeding below! 10 11 This code needs work 12 13 1) get rid of all memory bleeding 14 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 15 rather than having the sp flag for PC_SPAI 16 3) fix to set the block size based on the matrix block size 17 18 */ 19 #if !defined(PETSC_SKIP_COMPLEX) 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 #endif 22 23 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 24 #include <../src/ksp/pc/impls/spai/petscspai.h> 25 26 /* 27 These are the SPAI include files 28 */ 29 EXTERN_C_BEGIN 30 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 31 #include <spai.h> 32 #include <matrix.h> 33 EXTERN_C_END 34 35 static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **); 36 static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *); 37 38 typedef struct { 39 matrix *B; /* matrix in SPAI format */ 40 matrix *BT; /* transpose of matrix in SPAI format */ 41 matrix *M; /* the approximate inverse in SPAI format */ 42 43 Mat PM; /* the approximate inverse PETSc format */ 44 45 double epsilon; /* tolerance */ 46 int nbsteps; /* max number of "improvement" steps per line */ 47 int max; /* max dimensions of is_I, q, etc. */ 48 int maxnew; /* max number of new entries per step */ 49 int block_size; /* constant block size */ 50 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 51 int verbose; /* SPAI prints timing and statistics */ 52 53 int sp; /* symmetric nonzero pattern */ 54 MPI_Comm comm_spai; /* communicator to be used with spai */ 55 } PC_SPAI; 56 57 static PetscErrorCode PCSetUp_SPAI(PC pc) 58 { 59 PC_SPAI *ispai = (PC_SPAI *)pc->data; 60 Mat AT; 61 62 PetscFunctionBegin; 63 init_SPAI(); 64 65 if (ispai->sp) { 66 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B)); 67 } else { 68 /* Use the transpose to get the column nonzero structure. */ 69 PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT)); 70 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B)); 71 PetscCall(MatDestroy(&AT)); 72 } 73 74 /* Destroy the transpose */ 75 /* Don't know how to do it. PETSc developers? */ 76 77 /* construct SPAI preconditioner */ 78 /* FILE *messages */ /* file for warning messages */ 79 /* double epsilon */ /* tolerance */ 80 /* int nbsteps */ /* max number of "improvement" steps per line */ 81 /* int max */ /* max dimensions of is_I, q, etc. */ 82 /* int maxnew */ /* max number of new entries per step */ 83 /* int block_size */ /* block_size == 1 specifies scalar elements 84 block_size == n specifies nxn constant-block elements 85 block_size == 0 specifies variable-block elements */ 86 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 87 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 88 verbose == 1 prints timing and matrix statistics */ 89 90 PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose); 91 92 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM)); 93 94 /* free the SPAI matrices */ 95 sp_free_matrix(ispai->B); 96 sp_free_matrix(ispai->M); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) 101 { 102 PC_SPAI *ispai = (PC_SPAI *)pc->data; 103 104 PetscFunctionBegin; 105 /* Now using PETSc's multiply */ 106 PetscCall(MatMult(ispai->PM, xx, y)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) 111 { 112 PC_SPAI *ispai = (PC_SPAI *)pc->data; 113 114 PetscFunctionBegin; 115 /* Now using PETSc's multiply */ 116 PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 static PetscErrorCode PCDestroy_SPAI(PC pc) 121 { 122 PC_SPAI *ispai = (PC_SPAI *)pc->data; 123 124 PetscFunctionBegin; 125 PetscCall(MatDestroy(&ispai->PM)); 126 PetscCallMPI(MPI_Comm_free(&ispai->comm_spai)); 127 PetscCall(PetscFree(pc->data)); 128 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL)); 129 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL)); 130 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL)); 131 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL)); 132 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL)); 133 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL)); 134 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL)); 135 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL)); 136 PetscFunctionReturn(PETSC_SUCCESS); 137 } 138 139 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) 140 { 141 PC_SPAI *ispai = (PC_SPAI *)pc->data; 142 PetscBool isascii; 143 144 PetscFunctionBegin; 145 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 146 if (isascii) { 147 PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon)); 148 PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps)); 149 PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max)); 150 PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew)); 151 PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size)); 152 PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size)); 153 PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose)); 154 PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp)); 155 } 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) 160 { 161 PC_SPAI *ispai = (PC_SPAI *)pc->data; 162 163 PetscFunctionBegin; 164 ispai->epsilon = (double)epsilon1; 165 PetscFunctionReturn(PETSC_SUCCESS); 166 } 167 168 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) 169 { 170 PC_SPAI *ispai = (PC_SPAI *)pc->data; 171 172 PetscFunctionBegin; 173 ispai->nbsteps = (int)nbsteps1; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 /* added 1/7/99 g.h. */ 178 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) 179 { 180 PC_SPAI *ispai = (PC_SPAI *)pc->data; 181 182 PetscFunctionBegin; 183 ispai->max = (int)max1; 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) 188 { 189 PC_SPAI *ispai = (PC_SPAI *)pc->data; 190 191 PetscFunctionBegin; 192 ispai->maxnew = (int)maxnew1; 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) 197 { 198 PC_SPAI *ispai = (PC_SPAI *)pc->data; 199 200 PetscFunctionBegin; 201 ispai->block_size = (int)block_size1; 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) 206 { 207 PC_SPAI *ispai = (PC_SPAI *)pc->data; 208 209 PetscFunctionBegin; 210 ispai->cache_size = (int)cache_size; 211 PetscFunctionReturn(PETSC_SUCCESS); 212 } 213 214 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) 215 { 216 PC_SPAI *ispai = (PC_SPAI *)pc->data; 217 218 PetscFunctionBegin; 219 ispai->verbose = (int)verbose; 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) 224 { 225 PC_SPAI *ispai = (PC_SPAI *)pc->data; 226 227 PetscFunctionBegin; 228 ispai->sp = (int)sp; 229 PetscFunctionReturn(PETSC_SUCCESS); 230 } 231 232 /*@ 233 PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner 234 235 Input Parameters: 236 + pc - the preconditioner 237 - epsilon1 - the tolerance (default .4) 238 239 Level: intermediate 240 241 Note: 242 `espilon1` must be between 0 and 1. It controls the 243 quality of the approximation of M to the inverse of 244 A. Higher values of `epsilon1` lead to more work, more 245 fill, and usually better preconditioners. In many 246 cases the best choice of `epsilon1` is the one that 247 divides the total solution time equally between the 248 preconditioner and the solver. 249 250 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 251 @*/ 252 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) 253 { 254 PetscFunctionBegin; 255 PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1)); 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 /*@ 260 PCSPAISetNBSteps - set maximum number of improvement steps per row in 261 the `PCSPAI` preconditioner 262 263 Input Parameters: 264 + pc - the preconditioner 265 - nbsteps1 - number of steps (default 5) 266 267 Note: 268 `PCSPAI` constructs to approximation to every column of 269 the exact inverse of A in a series of improvement 270 steps. The quality of the approximation is determined 271 by epsilon. If an approximation achieving an accuracy 272 of epsilon is not obtained after `nbsteps1` steps, `PCSPAI` simply 273 uses the best approximation constructed so far. 274 275 Level: intermediate 276 277 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()` 278 @*/ 279 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) 280 { 281 PetscFunctionBegin; 282 PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1)); 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 /* added 1/7/99 g.h. */ 287 /*@ 288 PCSPAISetMax - set the size of various working buffers in the `PCSPAI` preconditioner 289 290 Input Parameters: 291 + pc - the preconditioner 292 - max1 - size (default is 5000) 293 294 Level: intermediate 295 296 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 297 @*/ 298 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) 299 { 300 PetscFunctionBegin; 301 PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1)); 302 PetscFunctionReturn(PETSC_SUCCESS); 303 } 304 305 /*@ 306 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step in the `PCSPAI` preconditioner 307 308 Input Parameters: 309 + pc - the preconditioner 310 - maxnew1 - maximum number (default 5) 311 312 Level: intermediate 313 314 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()` 315 @*/ 316 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) 317 { 318 PetscFunctionBegin; 319 PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1)); 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 /*@ 324 PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner 325 326 Input Parameters: 327 + pc - the preconditioner 328 - block_size1 - block size (default 1) 329 330 Level: intermediate 331 332 Notes: 333 A block 334 size of 1 treats A as a matrix of scalar elements. A 335 block size of s > 1 treats A as a matrix of sxs 336 blocks. A block size of 0 treats A as a matrix with 337 variable sized blocks, which are determined by 338 searching for dense square diagonal blocks in A. 339 This can be very effective for finite-element 340 matrices. 341 342 SPAI will convert A to block form, use a block 343 version of the preconditioner algorithm, and then 344 convert the result back to scalar form. 345 346 In many cases the a block-size parameter other than 1 347 can lead to very significant improvement in 348 performance. 349 350 Developer Note: 351 This preconditioner could use the matrix block size as the default block size to use 352 353 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 354 @*/ 355 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) 356 { 357 PetscFunctionBegin; 358 PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /*@ 363 PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner 364 365 Input Parameters: 366 + pc - the preconditioner 367 - cache_size - cache size {0,1,2,3,4,5} (default 5) 368 369 Level: intermediate 370 371 Note: 372 `PCSPAI` uses a hash table to cache messages and avoid 373 redundant communication. If suggest always using 374 5. This parameter is irrelevant in the serial 375 version. 376 377 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 378 @*/ 379 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) 380 { 381 PetscFunctionBegin; 382 PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size)); 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /*@ 387 PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner 388 389 Input Parameters: 390 + pc - the preconditioner 391 - verbose - level (default 1) 392 393 Level: intermediate 394 395 Note: 396 Prints parameters, timings and matrix statistics 397 398 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 399 @*/ 400 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) 401 { 402 PetscFunctionBegin; 403 PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose)); 404 PetscFunctionReturn(PETSC_SUCCESS); 405 } 406 407 /*@ 408 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner 409 410 Input Parameters: 411 + pc - the preconditioner 412 - sp - 0 or 1 413 414 Level: intermediate 415 416 Note: 417 If A has a symmetric nonzero pattern use `sp` 1 to 418 improve performance by eliminating some communication 419 in the parallel version. Even if A does not have a 420 symmetric nonzero pattern `sp` 1 may well lead to good 421 results, but the code will not follow the published 422 SPAI algorithm exactly. 423 424 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()` 425 @*/ 426 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) 427 { 428 PetscFunctionBegin; 429 PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp)); 430 PetscFunctionReturn(PETSC_SUCCESS); 431 } 432 433 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems PetscOptionsObject) 434 { 435 PC_SPAI *ispai = (PC_SPAI *)pc->data; 436 int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp; 437 double epsilon1; 438 PetscBool flg; 439 440 PetscFunctionBegin; 441 PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options"); 442 PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg)); 443 if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1)); 444 PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg)); 445 if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1)); 446 /* added 1/7/99 g.h. */ 447 PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg)); 448 if (flg) PetscCall(PCSPAISetMax(pc, max1)); 449 PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg)); 450 if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1)); 451 PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg)); 452 if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1)); 453 PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg)); 454 if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size)); 455 PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg)); 456 if (flg) PetscCall(PCSPAISetVerbose(pc, verbose)); 457 PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg)); 458 if (flg) PetscCall(PCSPAISetSp(pc, sp)); 459 PetscOptionsHeadEnd(); 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 /*MC 464 PCSPAI - Use the Sparse Approximate Inverse method {cite}`gh97` 465 466 Options Database Keys: 467 + -pc_spai_epsilon <eps> - set tolerance 468 . -pc_spai_nbstep <n> - set nbsteps 469 . -pc_spai_max <m> - set max 470 . -pc_spai_max_new <m> - set maxnew 471 . -pc_spai_block_size <n> - set block size 472 . -pc_spai_cache_size <n> - set cache size 473 . -pc_spai_sp <m> - set sp 474 - -pc_spai_set_verbose <true,false> - verbose output 475 476 Level: beginner 477 478 Note: 479 This only works with `MATAIJ` matrices. 480 481 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 482 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`, 483 `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()` 484 M*/ 485 486 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 487 { 488 PC_SPAI *ispai; 489 490 PetscFunctionBegin; 491 PetscCall(PetscNew(&ispai)); 492 pc->data = ispai; 493 494 pc->ops->destroy = PCDestroy_SPAI; 495 pc->ops->apply = PCApply_SPAI; 496 pc->ops->matapply = PCMatApply_SPAI; 497 pc->ops->applyrichardson = 0; 498 pc->ops->setup = PCSetUp_SPAI; 499 pc->ops->view = PCView_SPAI; 500 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 501 502 ispai->epsilon = .4; 503 ispai->nbsteps = 5; 504 ispai->max = 5000; 505 ispai->maxnew = 5; 506 ispai->block_size = 1; 507 ispai->cache_size = 5; 508 ispai->verbose = 0; 509 510 ispai->sp = 1; 511 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &ispai->comm_spai)); 512 513 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI)); 514 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI)); 515 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI)); 516 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI)); 517 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI)); 518 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI)); 519 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI)); 520 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI)); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 /* 525 Converts from a PETSc matrix to an SPAI matrix 526 */ 527 static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) 528 { 529 matrix *M; 530 int i, j, col; 531 int row_indx; 532 int len, pe, local_indx, start_indx; 533 int *mapping; 534 const int *cols; 535 const double *vals; 536 int n, mnl, nnl, nz, rstart, rend; 537 PetscMPIInt size, rank; 538 struct compressed_lines *rows; 539 540 PetscFunctionBegin; 541 PetscCallMPI(MPI_Comm_size(comm, &size)); 542 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 543 PetscCall(MatGetSize(A, &n, &n)); 544 PetscCall(MatGetLocalSize(A, &mnl, &nnl)); 545 546 /* 547 not sure why a barrier is required. commenting out 548 PetscCallMPI(MPI_Barrier(comm)); 549 */ 550 551 M = new_matrix((SPAI_Comm)comm); 552 553 M->n = n; 554 M->bs = 1; 555 M->max_block_size = 1; 556 557 M->mnls = (int *)malloc(sizeof(int) * size); 558 M->start_indices = (int *)malloc(sizeof(int) * size); 559 M->pe = (int *)malloc(sizeof(int) * n); 560 M->block_sizes = (int *)malloc(sizeof(int) * n); 561 for (i = 0; i < n; i++) M->block_sizes[i] = 1; 562 563 PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm)); 564 565 M->start_indices[0] = 0; 566 for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1]; 567 568 M->mnl = M->mnls[M->myid]; 569 M->my_start_index = M->start_indices[M->myid]; 570 571 for (i = 0; i < size; i++) { 572 start_indx = M->start_indices[i]; 573 for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i; 574 } 575 576 if (AT) { 577 M->lines = new_compressed_lines(M->mnls[rank], 1); 578 } else { 579 M->lines = new_compressed_lines(M->mnls[rank], 0); 580 } 581 582 rows = M->lines; 583 584 /* Determine the mapping from global indices to pointers */ 585 PetscCall(PetscMalloc1(M->n, &mapping)); 586 pe = 0; 587 local_indx = 0; 588 for (i = 0; i < M->n; i++) { 589 if (local_indx >= M->mnls[pe]) { 590 pe++; 591 local_indx = 0; 592 } 593 mapping[i] = local_indx + M->start_indices[pe]; 594 local_indx++; 595 } 596 597 /************** Set up the row structure *****************/ 598 599 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 600 for (i = rstart; i < rend; i++) { 601 row_indx = i - rstart; 602 PetscCall(MatGetRow(A, i, &nz, &cols, &vals)); 603 /* allocate buffers */ 604 rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 605 rows->A[row_indx] = (double *)malloc(nz * sizeof(double)); 606 /* copy the matrix */ 607 for (j = 0; j < nz; j++) { 608 col = cols[j]; 609 len = rows->len[row_indx]++; 610 611 rows->ptrs[row_indx][len] = mapping[col]; 612 rows->A[row_indx][len] = vals[j]; 613 } 614 rows->slen[row_indx] = rows->len[row_indx]; 615 616 PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals)); 617 } 618 619 /************** Set up the column structure *****************/ 620 621 if (AT) { 622 for (i = rstart; i < rend; i++) { 623 row_indx = i - rstart; 624 PetscCall(MatGetRow(AT, i, &nz, &cols, &vals)); 625 /* allocate buffers */ 626 rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 627 /* copy the matrix (i.e., the structure) */ 628 for (j = 0; j < nz; j++) { 629 col = cols[j]; 630 len = rows->rlen[row_indx]++; 631 632 rows->rptrs[row_indx][len] = mapping[col]; 633 } 634 PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals)); 635 } 636 } 637 638 PetscCall(PetscFree(mapping)); 639 640 order_pointers(M); 641 M->maxnz = calc_maxnz(M); 642 *B = M; 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 /* 647 Converts from an SPAI matrix B to a PETSc matrix PB. 648 This assumes that the SPAI matrix B is stored in 649 COMPRESSED-ROW format. 650 */ 651 static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) 652 { 653 PetscMPIInt size, rank; 654 int m, n, M, N; 655 int d_nz, o_nz; 656 int *d_nnz, *o_nnz; 657 int i, k, global_row, global_col, first_diag_col, last_diag_col; 658 PetscScalar val; 659 660 PetscFunctionBegin; 661 PetscCallMPI(MPI_Comm_size(comm, &size)); 662 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 663 664 m = n = B->mnls[rank]; 665 d_nz = o_nz = 0; 666 667 /* Determine preallocation for MatCreateAIJ */ 668 PetscCall(PetscMalloc1(m, &d_nnz)); 669 PetscCall(PetscMalloc1(m, &o_nnz)); 670 for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0; 671 first_diag_col = B->start_indices[rank]; 672 last_diag_col = first_diag_col + B->mnls[rank]; 673 for (i = 0; i < B->mnls[rank]; i++) { 674 for (k = 0; k < B->lines->len[i]; k++) { 675 global_col = B->lines->ptrs[i][k]; 676 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 677 else o_nnz[i]++; 678 } 679 } 680 681 M = N = B->n; 682 /* Here we only know how to create AIJ format */ 683 PetscCall(MatCreate(comm, PB)); 684 PetscCall(MatSetSizes(*PB, m, n, M, N)); 685 PetscCall(MatSetType(*PB, MATAIJ)); 686 PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz)); 687 PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz)); 688 689 for (i = 0; i < B->mnls[rank]; i++) { 690 global_row = B->start_indices[rank] + i; 691 for (k = 0; k < B->lines->len[i]; k++) { 692 global_col = B->lines->ptrs[i][k]; 693 694 val = B->lines->A[i][k]; 695 PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES)); 696 } 697 } 698 PetscCall(PetscFree(d_nnz)); 699 PetscCall(PetscFree(o_nnz)); 700 701 PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY)); 702 PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY)); 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705