1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *); 37 extern PetscErrorCode MM_to_PETSC(char *, char *, char *); 38 39 typedef struct { 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 static PetscErrorCode PCSetUp_SPAI(PC pc) 59 { 60 PC_SPAI *ispai = (PC_SPAI *)pc->data; 61 Mat AT; 62 63 PetscFunctionBegin; 64 init_SPAI(); 65 66 if (ispai->sp) { 67 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B)); 68 } else { 69 /* Use the transpose to get the column nonzero structure. */ 70 PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT)); 71 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B)); 72 PetscCall(MatDestroy(&AT)); 73 } 74 75 /* Destroy the transpose */ 76 /* Don't know how to do it. PETSc developers? */ 77 78 /* construct SPAI preconditioner */ 79 /* FILE *messages */ /* file for warning messages */ 80 /* double epsilon */ /* tolerance */ 81 /* int nbsteps */ /* max number of "improvement" steps per line */ 82 /* int max */ /* max dimensions of is_I, q, etc. */ 83 /* int maxnew */ /* max number of new entries per step */ 84 /* int block_size */ /* block_size == 1 specifies scalar elements 85 block_size == n specifies nxn constant-block elements 86 block_size == 0 specifies variable-block elements */ 87 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 88 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 89 verbose == 1 prints timing and matrix statistics */ 90 91 PetscCall(bspai(ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose)); 92 93 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM)); 94 95 /* free the SPAI matrices */ 96 sp_free_matrix(ispai->B); 97 sp_free_matrix(ispai->M); 98 PetscFunctionReturn(0); 99 } 100 101 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) 102 { 103 PC_SPAI *ispai = (PC_SPAI *)pc->data; 104 105 PetscFunctionBegin; 106 /* Now using PETSc's multiply */ 107 PetscCall(MatMult(ispai->PM, xx, y)); 108 PetscFunctionReturn(0); 109 } 110 111 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) 112 { 113 PC_SPAI *ispai = (PC_SPAI *)pc->data; 114 115 PetscFunctionBegin; 116 /* Now using PETSc's multiply */ 117 PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 118 PetscFunctionReturn(0); 119 } 120 121 static PetscErrorCode PCDestroy_SPAI(PC pc) 122 { 123 PC_SPAI *ispai = (PC_SPAI *)pc->data; 124 125 PetscFunctionBegin; 126 PetscCall(MatDestroy(&ispai->PM)); 127 PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai))); 128 PetscCall(PetscFree(pc->data)); 129 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL)); 130 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL)); 131 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL)); 132 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL)); 133 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL)); 134 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL)); 135 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL)); 136 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL)); 137 PetscFunctionReturn(0); 138 } 139 140 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) 141 { 142 PC_SPAI *ispai = (PC_SPAI *)pc->data; 143 PetscBool iascii; 144 145 PetscFunctionBegin; 146 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 147 if (iascii) { 148 PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon)); 149 PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps)); 150 PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max)); 151 PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew)); 152 PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size)); 153 PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size)); 154 PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose)); 155 PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp)); 156 } 157 PetscFunctionReturn(0); 158 } 159 160 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) 161 { 162 PC_SPAI *ispai = (PC_SPAI *)pc->data; 163 164 PetscFunctionBegin; 165 ispai->epsilon = (double)epsilon1; 166 PetscFunctionReturn(0); 167 } 168 169 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) 170 { 171 PC_SPAI *ispai = (PC_SPAI *)pc->data; 172 173 PetscFunctionBegin; 174 ispai->nbsteps = (int)nbsteps1; 175 PetscFunctionReturn(0); 176 } 177 178 /* added 1/7/99 g.h. */ 179 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) 180 { 181 PC_SPAI *ispai = (PC_SPAI *)pc->data; 182 183 PetscFunctionBegin; 184 ispai->max = (int)max1; 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) 189 { 190 PC_SPAI *ispai = (PC_SPAI *)pc->data; 191 192 PetscFunctionBegin; 193 ispai->maxnew = (int)maxnew1; 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) 198 { 199 PC_SPAI *ispai = (PC_SPAI *)pc->data; 200 201 PetscFunctionBegin; 202 ispai->block_size = (int)block_size1; 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) 207 { 208 PC_SPAI *ispai = (PC_SPAI *)pc->data; 209 210 PetscFunctionBegin; 211 ispai->cache_size = (int)cache_size; 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) 216 { 217 PC_SPAI *ispai = (PC_SPAI *)pc->data; 218 219 PetscFunctionBegin; 220 ispai->verbose = (int)verbose; 221 PetscFunctionReturn(0); 222 } 223 224 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) 225 { 226 PC_SPAI *ispai = (PC_SPAI *)pc->data; 227 228 PetscFunctionBegin; 229 ispai->sp = (int)sp; 230 PetscFunctionReturn(0); 231 } 232 233 /*@ 234 PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner 235 236 Input Parameters: 237 + pc - the preconditioner 238 - eps - epsilon (default .4) 239 240 Note: 241 Espilon must be between 0 and 1. It controls the 242 quality of the approximation of M to the inverse of 243 A. Higher values of epsilon lead to more work, more 244 fill, and usually better preconditioners. In many 245 cases the best choice of epsilon is the one that 246 divides the total solution time equally between the 247 preconditioner and the solver. 248 249 Level: intermediate 250 251 .seealso: `PCSPAI`, `PCSetType()` 252 @*/ 253 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) 254 { 255 PetscFunctionBegin; 256 PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1)); 257 PetscFunctionReturn(0); 258 } 259 260 /*@ 261 PCSPAISetNBSteps - set maximum number of improvement steps per row in 262 the `PCSPAI` preconditioner 263 264 Input Parameters: 265 + pc - the preconditioner 266 - n - number of steps (default 5) 267 268 Note: 269 `PCSPAI` constructs to approximation to every column of 270 the exact inverse of A in a series of improvement 271 steps. The quality of the approximation is determined 272 by epsilon. If an approximation achieving an accuracy 273 of epsilon is not obtained after ns steps, SPAI simply 274 uses the best approximation constructed so far. 275 276 Level: intermediate 277 278 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()` 279 @*/ 280 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) 281 { 282 PetscFunctionBegin; 283 PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1)); 284 PetscFunctionReturn(0); 285 } 286 287 /* added 1/7/99 g.h. */ 288 /*@ 289 PCSPAISetMax - set the size of various working buffers in 290 the `PCSPAI` preconditioner 291 292 Input Parameters: 293 + pc - the preconditioner 294 - n - size (default is 5000) 295 296 Level: intermediate 297 298 .seealso: `PCSPAI`, `PCSetType()` 299 @*/ 300 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) 301 { 302 PetscFunctionBegin; 303 PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1)); 304 PetscFunctionReturn(0); 305 } 306 307 /*@ 308 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 309 in `PCSPAI` preconditioner 310 311 Input Parameters: 312 + pc - the preconditioner 313 - n - maximum number (default 5) 314 315 Level: intermediate 316 317 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()` 318 @*/ 319 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) 320 { 321 PetscFunctionBegin; 322 PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1)); 323 PetscFunctionReturn(0); 324 } 325 326 /*@ 327 PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner 328 329 Input Parameters: 330 + pc - the preconditioner 331 - n - block size (default 1) 332 333 Notes: 334 A block 335 size of 1 treats A as a matrix of scalar elements. A 336 block size of s > 1 treats A as a matrix of sxs 337 blocks. A block size of 0 treats A as a matrix with 338 variable sized blocks, which are determined by 339 searching for dense square diagonal blocks in A. 340 This can be very effective for finite-element 341 matrices. 342 343 SPAI will convert A to block form, use a block 344 version of the preconditioner algorithm, and then 345 convert the result back to scalar form. 346 347 In many cases the a block-size parameter other than 1 348 can lead to very significant improvement in 349 performance. 350 351 Level: intermediate 352 353 .seealso: `PCSPAI`, `PCSetType()` 354 @*/ 355 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) 356 { 357 PetscFunctionBegin; 358 PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1)); 359 PetscFunctionReturn(0); 360 } 361 362 /*@ 363 PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner 364 365 Input Parameters: 366 + pc - the preconditioner 367 - n - cache size {0,1,2,3,4,5} (default 5) 368 369 Note: 370 `PCSPAI` uses a hash table to cache messages and avoid 371 redundant communication. If suggest always using 372 5. This parameter is irrelevant in the serial 373 version. 374 375 Level: intermediate 376 377 .seealso: `PCSPAI`, `PCSetType()` 378 @*/ 379 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) 380 { 381 PetscFunctionBegin; 382 PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size)); 383 PetscFunctionReturn(0); 384 } 385 386 /*@ 387 PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner 388 389 Input Parameters: 390 + pc - the preconditioner 391 - n - level (default 1) 392 393 Note: 394 print parameters, timings and matrix statistics 395 396 Level: intermediate 397 398 .seealso: `PCSPAI`, `PCSetType()` 399 @*/ 400 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) 401 { 402 PetscFunctionBegin; 403 PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose)); 404 PetscFunctionReturn(0); 405 } 406 407 /*@ 408 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner 409 410 Input Parameters: 411 + pc - the preconditioner 412 - n - 0 or 1 413 414 Note: 415 If A has a symmetric nonzero pattern use -sp 1 to 416 improve performance by eliminating some communication 417 in the parallel version. Even if A does not have a 418 symmetric nonzero pattern -sp 1 may well lead to good 419 results, but the code will not follow the published 420 SPAI algorithm exactly. 421 422 Level: intermediate 423 424 .seealso: `PCSPAI`, `PCSetType()` 425 @*/ 426 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) 427 { 428 PetscFunctionBegin; 429 PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp)); 430 PetscFunctionReturn(0); 431 } 432 433 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject) 434 { 435 PC_SPAI *ispai = (PC_SPAI *)pc->data; 436 int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp; 437 double epsilon1; 438 PetscBool flg; 439 440 PetscFunctionBegin; 441 PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options"); 442 PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg)); 443 if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1)); 444 PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg)); 445 if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1)); 446 /* added 1/7/99 g.h. */ 447 PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg)); 448 if (flg) PetscCall(PCSPAISetMax(pc, max1)); 449 PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg)); 450 if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1)); 451 PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg)); 452 if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1)); 453 PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg)); 454 if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size)); 455 PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg)); 456 if (flg) PetscCall(PCSPAISetVerbose(pc, verbose)); 457 PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg)); 458 if (flg) PetscCall(PCSPAISetSp(pc, sp)); 459 PetscOptionsHeadEnd(); 460 PetscFunctionReturn(0); 461 } 462 463 /*MC 464 PCSPAI - Use the Sparse Approximate Inverse method 465 466 Options Database Keys: 467 + -pc_spai_epsilon <eps> - set tolerance 468 . -pc_spai_nbstep <n> - set nbsteps 469 . -pc_spai_max <m> - set max 470 . -pc_spai_max_new <m> - set maxnew 471 . -pc_spai_block_size <n> - set block size 472 . -pc_spai_cache_size <n> - set cache size 473 . -pc_spai_sp <m> - set sp 474 - -pc_spai_set_verbose <true,false> - verbose output 475 476 Level: beginner 477 478 Note: 479 This only works with `MATAIJ` matrices. 480 481 References: 482 . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3) 483 484 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 485 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`, 486 `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()` 487 M*/ 488 489 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 490 { 491 PC_SPAI *ispai; 492 493 PetscFunctionBegin; 494 PetscCall(PetscNew(&ispai)); 495 pc->data = ispai; 496 497 pc->ops->destroy = PCDestroy_SPAI; 498 pc->ops->apply = PCApply_SPAI; 499 pc->ops->matapply = PCMatApply_SPAI; 500 pc->ops->applyrichardson = 0; 501 pc->ops->setup = PCSetUp_SPAI; 502 pc->ops->view = PCView_SPAI; 503 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 504 505 ispai->epsilon = .4; 506 ispai->nbsteps = 5; 507 ispai->max = 5000; 508 ispai->maxnew = 5; 509 ispai->block_size = 1; 510 ispai->cache_size = 5; 511 ispai->verbose = 0; 512 513 ispai->sp = 1; 514 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai))); 515 516 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI)); 517 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI)); 518 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI)); 519 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI)); 520 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI)); 521 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI)); 522 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI)); 523 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI)); 524 PetscFunctionReturn(0); 525 } 526 527 /* 528 Converts from a PETSc matrix to an SPAI matrix 529 */ 530 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) 531 { 532 matrix *M; 533 int i, j, col; 534 int row_indx; 535 int len, pe, local_indx, start_indx; 536 int *mapping; 537 const int *cols; 538 const double *vals; 539 int n, mnl, nnl, nz, rstart, rend; 540 PetscMPIInt size, rank; 541 struct compressed_lines *rows; 542 543 PetscFunctionBegin; 544 PetscCallMPI(MPI_Comm_size(comm, &size)); 545 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 546 PetscCall(MatGetSize(A, &n, &n)); 547 PetscCall(MatGetLocalSize(A, &mnl, &nnl)); 548 549 /* 550 not sure why a barrier is required. commenting out 551 PetscCallMPI(MPI_Barrier(comm)); 552 */ 553 554 M = new_matrix((SPAI_Comm)comm); 555 556 M->n = n; 557 M->bs = 1; 558 M->max_block_size = 1; 559 560 M->mnls = (int *)malloc(sizeof(int) * size); 561 M->start_indices = (int *)malloc(sizeof(int) * size); 562 M->pe = (int *)malloc(sizeof(int) * n); 563 M->block_sizes = (int *)malloc(sizeof(int) * n); 564 for (i = 0; i < n; i++) M->block_sizes[i] = 1; 565 566 PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm)); 567 568 M->start_indices[0] = 0; 569 for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1]; 570 571 M->mnl = M->mnls[M->myid]; 572 M->my_start_index = M->start_indices[M->myid]; 573 574 for (i = 0; i < size; i++) { 575 start_indx = M->start_indices[i]; 576 for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i; 577 } 578 579 if (AT) { 580 M->lines = new_compressed_lines(M->mnls[rank], 1); 581 } else { 582 M->lines = new_compressed_lines(M->mnls[rank], 0); 583 } 584 585 rows = M->lines; 586 587 /* Determine the mapping from global indices to pointers */ 588 PetscCall(PetscMalloc1(M->n, &mapping)); 589 pe = 0; 590 local_indx = 0; 591 for (i = 0; i < M->n; i++) { 592 if (local_indx >= M->mnls[pe]) { 593 pe++; 594 local_indx = 0; 595 } 596 mapping[i] = local_indx + M->start_indices[pe]; 597 local_indx++; 598 } 599 600 /************** Set up the row structure *****************/ 601 602 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 603 for (i = rstart; i < rend; i++) { 604 row_indx = i - rstart; 605 PetscCall(MatGetRow(A, i, &nz, &cols, &vals)); 606 /* allocate buffers */ 607 rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 608 rows->A[row_indx] = (double *)malloc(nz * sizeof(double)); 609 /* copy the matrix */ 610 for (j = 0; j < nz; j++) { 611 col = cols[j]; 612 len = rows->len[row_indx]++; 613 614 rows->ptrs[row_indx][len] = mapping[col]; 615 rows->A[row_indx][len] = vals[j]; 616 } 617 rows->slen[row_indx] = rows->len[row_indx]; 618 619 PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals)); 620 } 621 622 /************** Set up the column structure *****************/ 623 624 if (AT) { 625 for (i = rstart; i < rend; i++) { 626 row_indx = i - rstart; 627 PetscCall(MatGetRow(AT, i, &nz, &cols, &vals)); 628 /* allocate buffers */ 629 rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 630 /* copy the matrix (i.e., the structure) */ 631 for (j = 0; j < nz; j++) { 632 col = cols[j]; 633 len = rows->rlen[row_indx]++; 634 635 rows->rptrs[row_indx][len] = mapping[col]; 636 } 637 PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals)); 638 } 639 } 640 641 PetscCall(PetscFree(mapping)); 642 643 order_pointers(M); 644 M->maxnz = calc_maxnz(M); 645 *B = M; 646 PetscFunctionReturn(0); 647 } 648 649 /* 650 Converts from an SPAI matrix B to a PETSc matrix PB. 651 This assumes that the SPAI matrix B is stored in 652 COMPRESSED-ROW format. 653 */ 654 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) 655 { 656 PetscMPIInt size, rank; 657 int m, n, M, N; 658 int d_nz, o_nz; 659 int *d_nnz, *o_nnz; 660 int i, k, global_row, global_col, first_diag_col, last_diag_col; 661 PetscScalar val; 662 663 PetscFunctionBegin; 664 PetscCallMPI(MPI_Comm_size(comm, &size)); 665 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 666 667 m = n = B->mnls[rank]; 668 d_nz = o_nz = 0; 669 670 /* Determine preallocation for MatCreateAIJ */ 671 PetscCall(PetscMalloc1(m, &d_nnz)); 672 PetscCall(PetscMalloc1(m, &o_nnz)); 673 for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0; 674 first_diag_col = B->start_indices[rank]; 675 last_diag_col = first_diag_col + B->mnls[rank]; 676 for (i = 0; i < B->mnls[rank]; i++) { 677 for (k = 0; k < B->lines->len[i]; k++) { 678 global_col = B->lines->ptrs[i][k]; 679 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 680 else o_nnz[i]++; 681 } 682 } 683 684 M = N = B->n; 685 /* Here we only know how to create AIJ format */ 686 PetscCall(MatCreate(comm, PB)); 687 PetscCall(MatSetSizes(*PB, m, n, M, N)); 688 PetscCall(MatSetType(*PB, MATAIJ)); 689 PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz)); 690 PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz)); 691 692 for (i = 0; i < B->mnls[rank]; i++) { 693 global_row = B->start_indices[rank] + i; 694 for (k = 0; k < B->lines->len[i]; k++) { 695 global_col = B->lines->ptrs[i][k]; 696 697 val = B->lines->A[i][k]; 698 PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES)); 699 } 700 } 701 702 PetscCall(PetscFree(d_nnz)); 703 PetscCall(PetscFree(o_nnz)); 704 705 PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY)); 706 PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY)); 707 PetscFunctionReturn(0); 708 } 709 710 /* 711 Converts from an SPAI vector v to a PETSc vec Pv. 712 */ 713 PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv) 714 { 715 PetscMPIInt size, rank; 716 int m, M, i, *mnls, *start_indices, *global_indices; 717 718 PetscFunctionBegin; 719 PetscCallMPI(MPI_Comm_size(comm, &size)); 720 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 721 722 m = v->mnl; 723 M = v->n; 724 725 PetscCall(VecCreateMPI(comm, m, M, Pv)); 726 727 PetscCall(PetscMalloc1(size, &mnls)); 728 PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm)); 729 730 PetscCall(PetscMalloc1(size, &start_indices)); 731 732 start_indices[0] = 0; 733 for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1]; 734 735 PetscCall(PetscMalloc1(v->mnl, &global_indices)); 736 for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i; 737 738 PetscCall(PetscFree(mnls)); 739 PetscCall(PetscFree(start_indices)); 740 741 PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES)); 742 PetscCall(VecAssemblyBegin(*Pv)); 743 PetscCall(VecAssemblyEnd(*Pv)); 744 745 PetscCall(PetscFree(global_indices)); 746 PetscFunctionReturn(0); 747 } 748