1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #if !defined(PETSC_SKIP_COMPLEX) 21 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 22 #endif 23 24 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 25 #include <../src/ksp/pc/impls/spai/petscspai.h> 26 27 /* 28 These are the SPAI include files 29 */ 30 EXTERN_C_BEGIN 31 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 32 #include <spai.h> 33 #include <matrix.h> 34 EXTERN_C_END 35 36 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **); 37 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *); 38 extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *); 39 extern PetscErrorCode MM_to_PETSC(char *, char *, char *); 40 41 typedef struct { 42 matrix *B; /* matrix in SPAI format */ 43 matrix *BT; /* transpose of matrix in SPAI format */ 44 matrix *M; /* the approximate inverse in SPAI format */ 45 46 Mat PM; /* the approximate inverse PETSc format */ 47 48 double epsilon; /* tolerance */ 49 int nbsteps; /* max number of "improvement" steps per line */ 50 int max; /* max dimensions of is_I, q, etc. */ 51 int maxnew; /* max number of new entries per step */ 52 int block_size; /* constant block size */ 53 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 54 int verbose; /* SPAI prints timing and statistics */ 55 56 int sp; /* symmetric nonzero pattern */ 57 MPI_Comm comm_spai; /* communicator to be used with spai */ 58 } PC_SPAI; 59 60 static PetscErrorCode PCSetUp_SPAI(PC pc) 61 { 62 PC_SPAI *ispai = (PC_SPAI *)pc->data; 63 Mat AT; 64 65 PetscFunctionBegin; 66 init_SPAI(); 67 68 if (ispai->sp) { 69 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B)); 70 } else { 71 /* Use the transpose to get the column nonzero structure. */ 72 PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT)); 73 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B)); 74 PetscCall(MatDestroy(&AT)); 75 } 76 77 /* Destroy the transpose */ 78 /* Don't know how to do it. PETSc developers? */ 79 80 /* construct SPAI preconditioner */ 81 /* FILE *messages */ /* file for warning messages */ 82 /* double epsilon */ /* tolerance */ 83 /* int nbsteps */ /* max number of "improvement" steps per line */ 84 /* int max */ /* max dimensions of is_I, q, etc. */ 85 /* int maxnew */ /* max number of new entries per step */ 86 /* int block_size */ /* block_size == 1 specifies scalar elements 87 block_size == n specifies nxn constant-block elements 88 block_size == 0 specifies variable-block elements */ 89 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 90 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 91 verbose == 1 prints timing and matrix statistics */ 92 93 PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose); 94 95 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM)); 96 97 /* free the SPAI matrices */ 98 sp_free_matrix(ispai->B); 99 sp_free_matrix(ispai->M); 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) 104 { 105 PC_SPAI *ispai = (PC_SPAI *)pc->data; 106 107 PetscFunctionBegin; 108 /* Now using PETSc's multiply */ 109 PetscCall(MatMult(ispai->PM, xx, y)); 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) 114 { 115 PC_SPAI *ispai = (PC_SPAI *)pc->data; 116 117 PetscFunctionBegin; 118 /* Now using PETSc's multiply */ 119 PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode PCDestroy_SPAI(PC pc) 124 { 125 PC_SPAI *ispai = (PC_SPAI *)pc->data; 126 127 PetscFunctionBegin; 128 PetscCall(MatDestroy(&ispai->PM)); 129 PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai))); 130 PetscCall(PetscFree(pc->data)); 131 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL)); 132 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL)); 133 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL)); 134 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL)); 135 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL)); 136 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL)); 137 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL)); 138 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) 143 { 144 PC_SPAI *ispai = (PC_SPAI *)pc->data; 145 PetscBool iascii; 146 147 PetscFunctionBegin; 148 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 149 if (iascii) { 150 PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon)); 151 PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps)); 152 PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max)); 153 PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew)); 154 PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size)); 155 PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size)); 156 PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose)); 157 PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp)); 158 } 159 PetscFunctionReturn(PETSC_SUCCESS); 160 } 161 162 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) 163 { 164 PC_SPAI *ispai = (PC_SPAI *)pc->data; 165 166 PetscFunctionBegin; 167 ispai->epsilon = (double)epsilon1; 168 PetscFunctionReturn(PETSC_SUCCESS); 169 } 170 171 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) 172 { 173 PC_SPAI *ispai = (PC_SPAI *)pc->data; 174 175 PetscFunctionBegin; 176 ispai->nbsteps = (int)nbsteps1; 177 PetscFunctionReturn(PETSC_SUCCESS); 178 } 179 180 /* added 1/7/99 g.h. */ 181 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) 182 { 183 PC_SPAI *ispai = (PC_SPAI *)pc->data; 184 185 PetscFunctionBegin; 186 ispai->max = (int)max1; 187 PetscFunctionReturn(PETSC_SUCCESS); 188 } 189 190 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) 191 { 192 PC_SPAI *ispai = (PC_SPAI *)pc->data; 193 194 PetscFunctionBegin; 195 ispai->maxnew = (int)maxnew1; 196 PetscFunctionReturn(PETSC_SUCCESS); 197 } 198 199 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) 200 { 201 PC_SPAI *ispai = (PC_SPAI *)pc->data; 202 203 PetscFunctionBegin; 204 ispai->block_size = (int)block_size1; 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) 209 { 210 PC_SPAI *ispai = (PC_SPAI *)pc->data; 211 212 PetscFunctionBegin; 213 ispai->cache_size = (int)cache_size; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) 218 { 219 PC_SPAI *ispai = (PC_SPAI *)pc->data; 220 221 PetscFunctionBegin; 222 ispai->verbose = (int)verbose; 223 PetscFunctionReturn(PETSC_SUCCESS); 224 } 225 226 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) 227 { 228 PC_SPAI *ispai = (PC_SPAI *)pc->data; 229 230 PetscFunctionBegin; 231 ispai->sp = (int)sp; 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 /*@ 236 PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner 237 238 Input Parameters: 239 + pc - the preconditioner 240 - eps - epsilon (default .4) 241 242 Note: 243 Espilon must be between 0 and 1. It controls the 244 quality of the approximation of M to the inverse of 245 A. Higher values of epsilon lead to more work, more 246 fill, and usually better preconditioners. In many 247 cases the best choice of epsilon is the one that 248 divides the total solution time equally between the 249 preconditioner and the solver. 250 251 Level: intermediate 252 253 .seealso: `PCSPAI`, `PCSetType()` 254 @*/ 255 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) 256 { 257 PetscFunctionBegin; 258 PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 /*@ 263 PCSPAISetNBSteps - set maximum number of improvement steps per row in 264 the `PCSPAI` preconditioner 265 266 Input Parameters: 267 + pc - the preconditioner 268 - n - number of steps (default 5) 269 270 Note: 271 `PCSPAI` constructs to approximation to every column of 272 the exact inverse of A in a series of improvement 273 steps. The quality of the approximation is determined 274 by epsilon. If an approximation achieving an accuracy 275 of epsilon is not obtained after ns steps, SPAI simply 276 uses the best approximation constructed so far. 277 278 Level: intermediate 279 280 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()` 281 @*/ 282 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) 283 { 284 PetscFunctionBegin; 285 PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 /* added 1/7/99 g.h. */ 290 /*@ 291 PCSPAISetMax - set the size of various working buffers in 292 the `PCSPAI` preconditioner 293 294 Input Parameters: 295 + pc - the preconditioner 296 - n - size (default is 5000) 297 298 Level: intermediate 299 300 .seealso: `PCSPAI`, `PCSetType()` 301 @*/ 302 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) 303 { 304 PetscFunctionBegin; 305 PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1)); 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 /*@ 310 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 311 in `PCSPAI` preconditioner 312 313 Input Parameters: 314 + pc - the preconditioner 315 - n - maximum number (default 5) 316 317 Level: intermediate 318 319 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()` 320 @*/ 321 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) 322 { 323 PetscFunctionBegin; 324 PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 /*@ 329 PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner 330 331 Input Parameters: 332 + pc - the preconditioner 333 - n - block size (default 1) 334 335 Notes: 336 A block 337 size of 1 treats A as a matrix of scalar elements. A 338 block size of s > 1 treats A as a matrix of sxs 339 blocks. A block size of 0 treats A as a matrix with 340 variable sized blocks, which are determined by 341 searching for dense square diagonal blocks in A. 342 This can be very effective for finite-element 343 matrices. 344 345 SPAI will convert A to block form, use a block 346 version of the preconditioner algorithm, and then 347 convert the result back to scalar form. 348 349 In many cases the a block-size parameter other than 1 350 can lead to very significant improvement in 351 performance. 352 353 Level: intermediate 354 355 .seealso: `PCSPAI`, `PCSetType()` 356 @*/ 357 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) 358 { 359 PetscFunctionBegin; 360 PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1)); 361 PetscFunctionReturn(PETSC_SUCCESS); 362 } 363 364 /*@ 365 PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner 366 367 Input Parameters: 368 + pc - the preconditioner 369 - n - cache size {0,1,2,3,4,5} (default 5) 370 371 Note: 372 `PCSPAI` uses a hash table to cache messages and avoid 373 redundant communication. If suggest always using 374 5. This parameter is irrelevant in the serial 375 version. 376 377 Level: intermediate 378 379 .seealso: `PCSPAI`, `PCSetType()` 380 @*/ 381 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) 382 { 383 PetscFunctionBegin; 384 PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size)); 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 /*@ 389 PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner 390 391 Input Parameters: 392 + pc - the preconditioner 393 - n - level (default 1) 394 395 Note: 396 print parameters, timings and matrix statistics 397 398 Level: intermediate 399 400 .seealso: `PCSPAI`, `PCSetType()` 401 @*/ 402 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) 403 { 404 PetscFunctionBegin; 405 PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose)); 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 /*@ 410 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner 411 412 Input Parameters: 413 + pc - the preconditioner 414 - n - 0 or 1 415 416 Note: 417 If A has a symmetric nonzero pattern use -sp 1 to 418 improve performance by eliminating some communication 419 in the parallel version. Even if A does not have a 420 symmetric nonzero pattern -sp 1 may well lead to good 421 results, but the code will not follow the published 422 SPAI algorithm exactly. 423 424 Level: intermediate 425 426 .seealso: `PCSPAI`, `PCSetType()` 427 @*/ 428 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) 429 { 430 PetscFunctionBegin; 431 PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject) 436 { 437 PC_SPAI *ispai = (PC_SPAI *)pc->data; 438 int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp; 439 double epsilon1; 440 PetscBool flg; 441 442 PetscFunctionBegin; 443 PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options"); 444 PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg)); 445 if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1)); 446 PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg)); 447 if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1)); 448 /* added 1/7/99 g.h. */ 449 PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg)); 450 if (flg) PetscCall(PCSPAISetMax(pc, max1)); 451 PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg)); 452 if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1)); 453 PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg)); 454 if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1)); 455 PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg)); 456 if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size)); 457 PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg)); 458 if (flg) PetscCall(PCSPAISetVerbose(pc, verbose)); 459 PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg)); 460 if (flg) PetscCall(PCSPAISetSp(pc, sp)); 461 PetscOptionsHeadEnd(); 462 PetscFunctionReturn(PETSC_SUCCESS); 463 } 464 465 /*MC 466 PCSPAI - Use the Sparse Approximate Inverse method 467 468 Options Database Keys: 469 + -pc_spai_epsilon <eps> - set tolerance 470 . -pc_spai_nbstep <n> - set nbsteps 471 . -pc_spai_max <m> - set max 472 . -pc_spai_max_new <m> - set maxnew 473 . -pc_spai_block_size <n> - set block size 474 . -pc_spai_cache_size <n> - set cache size 475 . -pc_spai_sp <m> - set sp 476 - -pc_spai_set_verbose <true,false> - verbose output 477 478 Level: beginner 479 480 Note: 481 This only works with `MATAIJ` matrices. 482 483 References: 484 . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3) 485 486 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 487 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`, 488 `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()` 489 M*/ 490 491 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 492 { 493 PC_SPAI *ispai; 494 495 PetscFunctionBegin; 496 PetscCall(PetscNew(&ispai)); 497 pc->data = ispai; 498 499 pc->ops->destroy = PCDestroy_SPAI; 500 pc->ops->apply = PCApply_SPAI; 501 pc->ops->matapply = PCMatApply_SPAI; 502 pc->ops->applyrichardson = 0; 503 pc->ops->setup = PCSetUp_SPAI; 504 pc->ops->view = PCView_SPAI; 505 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 506 507 ispai->epsilon = .4; 508 ispai->nbsteps = 5; 509 ispai->max = 5000; 510 ispai->maxnew = 5; 511 ispai->block_size = 1; 512 ispai->cache_size = 5; 513 ispai->verbose = 0; 514 515 ispai->sp = 1; 516 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai))); 517 518 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI)); 519 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI)); 520 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI)); 521 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI)); 522 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI)); 523 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI)); 524 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI)); 525 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 /* 530 Converts from a PETSc matrix to an SPAI matrix 531 */ 532 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) 533 { 534 matrix *M; 535 int i, j, col; 536 int row_indx; 537 int len, pe, local_indx, start_indx; 538 int *mapping; 539 const int *cols; 540 const double *vals; 541 int n, mnl, nnl, nz, rstart, rend; 542 PetscMPIInt size, rank; 543 struct compressed_lines *rows; 544 545 PetscFunctionBegin; 546 PetscCallMPI(MPI_Comm_size(comm, &size)); 547 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 548 PetscCall(MatGetSize(A, &n, &n)); 549 PetscCall(MatGetLocalSize(A, &mnl, &nnl)); 550 551 /* 552 not sure why a barrier is required. commenting out 553 PetscCallMPI(MPI_Barrier(comm)); 554 */ 555 556 M = new_matrix((SPAI_Comm)comm); 557 558 M->n = n; 559 M->bs = 1; 560 M->max_block_size = 1; 561 562 M->mnls = (int *)malloc(sizeof(int) * size); 563 M->start_indices = (int *)malloc(sizeof(int) * size); 564 M->pe = (int *)malloc(sizeof(int) * n); 565 M->block_sizes = (int *)malloc(sizeof(int) * n); 566 for (i = 0; i < n; i++) M->block_sizes[i] = 1; 567 568 PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm)); 569 570 M->start_indices[0] = 0; 571 for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1]; 572 573 M->mnl = M->mnls[M->myid]; 574 M->my_start_index = M->start_indices[M->myid]; 575 576 for (i = 0; i < size; i++) { 577 start_indx = M->start_indices[i]; 578 for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i; 579 } 580 581 if (AT) { 582 M->lines = new_compressed_lines(M->mnls[rank], 1); 583 } else { 584 M->lines = new_compressed_lines(M->mnls[rank], 0); 585 } 586 587 rows = M->lines; 588 589 /* Determine the mapping from global indices to pointers */ 590 PetscCall(PetscMalloc1(M->n, &mapping)); 591 pe = 0; 592 local_indx = 0; 593 for (i = 0; i < M->n; i++) { 594 if (local_indx >= M->mnls[pe]) { 595 pe++; 596 local_indx = 0; 597 } 598 mapping[i] = local_indx + M->start_indices[pe]; 599 local_indx++; 600 } 601 602 /************** Set up the row structure *****************/ 603 604 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 605 for (i = rstart; i < rend; i++) { 606 row_indx = i - rstart; 607 PetscCall(MatGetRow(A, i, &nz, &cols, &vals)); 608 /* allocate buffers */ 609 rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 610 rows->A[row_indx] = (double *)malloc(nz * sizeof(double)); 611 /* copy the matrix */ 612 for (j = 0; j < nz; j++) { 613 col = cols[j]; 614 len = rows->len[row_indx]++; 615 616 rows->ptrs[row_indx][len] = mapping[col]; 617 rows->A[row_indx][len] = vals[j]; 618 } 619 rows->slen[row_indx] = rows->len[row_indx]; 620 621 PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals)); 622 } 623 624 /************** Set up the column structure *****************/ 625 626 if (AT) { 627 for (i = rstart; i < rend; i++) { 628 row_indx = i - rstart; 629 PetscCall(MatGetRow(AT, i, &nz, &cols, &vals)); 630 /* allocate buffers */ 631 rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 632 /* copy the matrix (i.e., the structure) */ 633 for (j = 0; j < nz; j++) { 634 col = cols[j]; 635 len = rows->rlen[row_indx]++; 636 637 rows->rptrs[row_indx][len] = mapping[col]; 638 } 639 PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals)); 640 } 641 } 642 643 PetscCall(PetscFree(mapping)); 644 645 order_pointers(M); 646 M->maxnz = calc_maxnz(M); 647 *B = M; 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 651 /* 652 Converts from an SPAI matrix B to a PETSc matrix PB. 653 This assumes that the SPAI matrix B is stored in 654 COMPRESSED-ROW format. 655 */ 656 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) 657 { 658 PetscMPIInt size, rank; 659 int m, n, M, N; 660 int d_nz, o_nz; 661 int *d_nnz, *o_nnz; 662 int i, k, global_row, global_col, first_diag_col, last_diag_col; 663 PetscScalar val; 664 665 PetscFunctionBegin; 666 PetscCallMPI(MPI_Comm_size(comm, &size)); 667 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 668 669 m = n = B->mnls[rank]; 670 d_nz = o_nz = 0; 671 672 /* Determine preallocation for MatCreateAIJ */ 673 PetscCall(PetscMalloc1(m, &d_nnz)); 674 PetscCall(PetscMalloc1(m, &o_nnz)); 675 for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0; 676 first_diag_col = B->start_indices[rank]; 677 last_diag_col = first_diag_col + B->mnls[rank]; 678 for (i = 0; i < B->mnls[rank]; i++) { 679 for (k = 0; k < B->lines->len[i]; k++) { 680 global_col = B->lines->ptrs[i][k]; 681 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 682 else o_nnz[i]++; 683 } 684 } 685 686 M = N = B->n; 687 /* Here we only know how to create AIJ format */ 688 PetscCall(MatCreate(comm, PB)); 689 PetscCall(MatSetSizes(*PB, m, n, M, N)); 690 PetscCall(MatSetType(*PB, MATAIJ)); 691 PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz)); 692 PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz)); 693 694 for (i = 0; i < B->mnls[rank]; i++) { 695 global_row = B->start_indices[rank] + i; 696 for (k = 0; k < B->lines->len[i]; k++) { 697 global_col = B->lines->ptrs[i][k]; 698 699 val = B->lines->A[i][k]; 700 PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES)); 701 } 702 } 703 704 PetscCall(PetscFree(d_nnz)); 705 PetscCall(PetscFree(o_nnz)); 706 707 PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY)); 708 PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY)); 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 /* 713 Converts from an SPAI vector v to a PETSc vec Pv. 714 */ 715 PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv) 716 { 717 PetscMPIInt size, rank; 718 int m, M, i, *mnls, *start_indices, *global_indices; 719 720 PetscFunctionBegin; 721 PetscCallMPI(MPI_Comm_size(comm, &size)); 722 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 723 724 m = v->mnl; 725 M = v->n; 726 727 PetscCall(VecCreateMPI(comm, m, M, Pv)); 728 729 PetscCall(PetscMalloc1(size, &mnls)); 730 PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm)); 731 732 PetscCall(PetscMalloc1(size, &start_indices)); 733 734 start_indices[0] = 0; 735 for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1]; 736 737 PetscCall(PetscMalloc1(v->mnl, &global_indices)); 738 for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i; 739 740 PetscCall(PetscFree(mnls)); 741 PetscCall(PetscFree(start_indices)); 742 743 PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES)); 744 PetscCall(VecAssemblyBegin(*Pv)); 745 PetscCall(VecAssemblyEnd(*Pv)); 746 747 PetscCall(PetscFree(global_indices)); 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750