1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *); 37 extern PetscErrorCode MM_to_PETSC(char *, char *, char *); 38 39 typedef struct { 40 matrix *B; /* matrix in SPAI format */ 41 matrix *BT; /* transpose of matrix in SPAI format */ 42 matrix *M; /* the approximate inverse in SPAI format */ 43 44 Mat PM; /* the approximate inverse PETSc format */ 45 46 double epsilon; /* tolerance */ 47 int nbsteps; /* max number of "improvement" steps per line */ 48 int max; /* max dimensions of is_I, q, etc. */ 49 int maxnew; /* max number of new entries per step */ 50 int block_size; /* constant block size */ 51 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 52 int verbose; /* SPAI prints timing and statistics */ 53 54 int sp; /* symmetric nonzero pattern */ 55 MPI_Comm comm_spai; /* communicator to be used with spai */ 56 } PC_SPAI; 57 58 static PetscErrorCode PCSetUp_SPAI(PC pc) { 59 PC_SPAI *ispai = (PC_SPAI *)pc->data; 60 Mat AT; 61 62 PetscFunctionBegin; 63 init_SPAI(); 64 65 if (ispai->sp) { 66 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B)); 67 } else { 68 /* Use the transpose to get the column nonzero structure. */ 69 PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT)); 70 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B)); 71 PetscCall(MatDestroy(&AT)); 72 } 73 74 /* Destroy the transpose */ 75 /* Don't know how to do it. PETSc developers? */ 76 77 /* construct SPAI preconditioner */ 78 /* FILE *messages */ /* file for warning messages */ 79 /* double epsilon */ /* tolerance */ 80 /* int nbsteps */ /* max number of "improvement" steps per line */ 81 /* int max */ /* max dimensions of is_I, q, etc. */ 82 /* int maxnew */ /* max number of new entries per step */ 83 /* int block_size */ /* block_size == 1 specifies scalar elments 84 block_size == n specifies nxn constant-block elements 85 block_size == 0 specifies variable-block elements */ 86 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 87 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 88 verbose == 1 prints timing and matrix statistics */ 89 90 PetscCall(bspai(ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose)); 91 92 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM)); 93 94 /* free the SPAI matrices */ 95 sp_free_matrix(ispai->B); 96 sp_free_matrix(ispai->M); 97 PetscFunctionReturn(0); 98 } 99 100 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) { 101 PC_SPAI *ispai = (PC_SPAI *)pc->data; 102 103 PetscFunctionBegin; 104 /* Now using PETSc's multiply */ 105 PetscCall(MatMult(ispai->PM, xx, y)); 106 PetscFunctionReturn(0); 107 } 108 109 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) { 110 PC_SPAI *ispai = (PC_SPAI *)pc->data; 111 112 PetscFunctionBegin; 113 /* Now using PETSc's multiply */ 114 PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 115 PetscFunctionReturn(0); 116 } 117 118 static PetscErrorCode PCDestroy_SPAI(PC pc) { 119 PC_SPAI *ispai = (PC_SPAI *)pc->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatDestroy(&ispai->PM)); 123 PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai))); 124 PetscCall(PetscFree(pc->data)); 125 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL)); 126 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL)); 127 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL)); 128 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL)); 129 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL)); 130 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL)); 131 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL)); 132 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL)); 133 PetscFunctionReturn(0); 134 } 135 136 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) { 137 PC_SPAI *ispai = (PC_SPAI *)pc->data; 138 PetscBool iascii; 139 140 PetscFunctionBegin; 141 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 142 if (iascii) { 143 PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon)); 144 PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps)); 145 PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max)); 146 PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew)); 147 PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size)); 148 PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size)); 149 PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose)); 150 PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp)); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) { 156 PC_SPAI *ispai = (PC_SPAI *)pc->data; 157 158 PetscFunctionBegin; 159 ispai->epsilon = (double)epsilon1; 160 PetscFunctionReturn(0); 161 } 162 163 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) { 164 PC_SPAI *ispai = (PC_SPAI *)pc->data; 165 166 PetscFunctionBegin; 167 ispai->nbsteps = (int)nbsteps1; 168 PetscFunctionReturn(0); 169 } 170 171 /* added 1/7/99 g.h. */ 172 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) { 173 PC_SPAI *ispai = (PC_SPAI *)pc->data; 174 175 PetscFunctionBegin; 176 ispai->max = (int)max1; 177 PetscFunctionReturn(0); 178 } 179 180 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) { 181 PC_SPAI *ispai = (PC_SPAI *)pc->data; 182 183 PetscFunctionBegin; 184 ispai->maxnew = (int)maxnew1; 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) { 189 PC_SPAI *ispai = (PC_SPAI *)pc->data; 190 191 PetscFunctionBegin; 192 ispai->block_size = (int)block_size1; 193 PetscFunctionReturn(0); 194 } 195 196 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) { 197 PC_SPAI *ispai = (PC_SPAI *)pc->data; 198 199 PetscFunctionBegin; 200 ispai->cache_size = (int)cache_size; 201 PetscFunctionReturn(0); 202 } 203 204 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) { 205 PC_SPAI *ispai = (PC_SPAI *)pc->data; 206 207 PetscFunctionBegin; 208 ispai->verbose = (int)verbose; 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) { 213 PC_SPAI *ispai = (PC_SPAI *)pc->data; 214 215 PetscFunctionBegin; 216 ispai->sp = (int)sp; 217 PetscFunctionReturn(0); 218 } 219 220 /*@ 221 PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner 222 223 Input Parameters: 224 + pc - the preconditioner 225 - eps - epsilon (default .4) 226 227 Note: 228 Espilon must be between 0 and 1. It controls the 229 quality of the approximation of M to the inverse of 230 A. Higher values of epsilon lead to more work, more 231 fill, and usually better preconditioners. In many 232 cases the best choice of epsilon is the one that 233 divides the total solution time equally between the 234 preconditioner and the solver. 235 236 Level: intermediate 237 238 .seealso: `PCSPAI`, `PCSetType()` 239 @*/ 240 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) { 241 PetscFunctionBegin; 242 PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1)); 243 PetscFunctionReturn(0); 244 } 245 246 /*@ 247 PCSPAISetNBSteps - set maximum number of improvement steps per row in 248 the `PCSPAI` preconditioner 249 250 Input Parameters: 251 + pc - the preconditioner 252 - n - number of steps (default 5) 253 254 Note: 255 `PCSPAI` constructs to approximation to every column of 256 the exact inverse of A in a series of improvement 257 steps. The quality of the approximation is determined 258 by epsilon. If an approximation achieving an accuracy 259 of epsilon is not obtained after ns steps, SPAI simply 260 uses the best approximation constructed so far. 261 262 Level: intermediate 263 264 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()` 265 @*/ 266 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) { 267 PetscFunctionBegin; 268 PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1)); 269 PetscFunctionReturn(0); 270 } 271 272 /* added 1/7/99 g.h. */ 273 /*@ 274 PCSPAISetMax - set the size of various working buffers in 275 the `PCSPAI` preconditioner 276 277 Input Parameters: 278 + pc - the preconditioner 279 - n - size (default is 5000) 280 281 Level: intermediate 282 283 .seealso: `PCSPAI`, `PCSetType()` 284 @*/ 285 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) { 286 PetscFunctionBegin; 287 PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1)); 288 PetscFunctionReturn(0); 289 } 290 291 /*@ 292 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 293 in `PCSPAI` preconditioner 294 295 Input Parameters: 296 + pc - the preconditioner 297 - n - maximum number (default 5) 298 299 Level: intermediate 300 301 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()` 302 @*/ 303 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) { 304 PetscFunctionBegin; 305 PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1)); 306 PetscFunctionReturn(0); 307 } 308 309 /*@ 310 PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner 311 312 Input Parameters: 313 + pc - the preconditioner 314 - n - block size (default 1) 315 316 Notes: 317 A block 318 size of 1 treats A as a matrix of scalar elements. A 319 block size of s > 1 treats A as a matrix of sxs 320 blocks. A block size of 0 treats A as a matrix with 321 variable sized blocks, which are determined by 322 searching for dense square diagonal blocks in A. 323 This can be very effective for finite-element 324 matrices. 325 326 SPAI will convert A to block form, use a block 327 version of the preconditioner algorithm, and then 328 convert the result back to scalar form. 329 330 In many cases the a block-size parameter other than 1 331 can lead to very significant improvement in 332 performance. 333 334 Level: intermediate 335 336 .seealso: `PCSPAI`, `PCSetType()` 337 @*/ 338 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) { 339 PetscFunctionBegin; 340 PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1)); 341 PetscFunctionReturn(0); 342 } 343 344 /*@ 345 PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner 346 347 Input Parameters: 348 + pc - the preconditioner 349 - n - cache size {0,1,2,3,4,5} (default 5) 350 351 Note: 352 `PCSPAI` uses a hash table to cache messages and avoid 353 redundant communication. If suggest always using 354 5. This parameter is irrelevant in the serial 355 version. 356 357 Level: intermediate 358 359 .seealso: `PCSPAI`, `PCSetType()` 360 @*/ 361 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) { 362 PetscFunctionBegin; 363 PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size)); 364 PetscFunctionReturn(0); 365 } 366 367 /*@ 368 PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner 369 370 Input Parameters: 371 + pc - the preconditioner 372 - n - level (default 1) 373 374 Note: 375 print parameters, timings and matrix statistics 376 377 Level: intermediate 378 379 .seealso: `PCSPAI`, `PCSetType()` 380 @*/ 381 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) { 382 PetscFunctionBegin; 383 PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose)); 384 PetscFunctionReturn(0); 385 } 386 387 /*@ 388 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner 389 390 Input Parameters: 391 + pc - the preconditioner 392 - n - 0 or 1 393 394 Note: 395 If A has a symmetric nonzero pattern use -sp 1 to 396 improve performance by eliminating some communication 397 in the parallel version. Even if A does not have a 398 symmetric nonzero pattern -sp 1 may well lead to good 399 results, but the code will not follow the published 400 SPAI algorithm exactly. 401 402 Level: intermediate 403 404 .seealso: `PCSPAI`, `PCSetType()` 405 @*/ 406 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) { 407 PetscFunctionBegin; 408 PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp)); 409 PetscFunctionReturn(0); 410 } 411 412 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject) { 413 PC_SPAI *ispai = (PC_SPAI *)pc->data; 414 int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp; 415 double epsilon1; 416 PetscBool flg; 417 418 PetscFunctionBegin; 419 PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options"); 420 PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg)); 421 if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1)); 422 PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg)); 423 if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1)); 424 /* added 1/7/99 g.h. */ 425 PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg)); 426 if (flg) PetscCall(PCSPAISetMax(pc, max1)); 427 PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg)); 428 if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1)); 429 PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg)); 430 if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1)); 431 PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg)); 432 if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size)); 433 PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg)); 434 if (flg) PetscCall(PCSPAISetVerbose(pc, verbose)); 435 PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg)); 436 if (flg) PetscCall(PCSPAISetSp(pc, sp)); 437 PetscOptionsHeadEnd(); 438 PetscFunctionReturn(0); 439 } 440 441 /*MC 442 PCSPAI - Use the Sparse Approximate Inverse method 443 444 Options Database Keys: 445 + -pc_spai_epsilon <eps> - set tolerance 446 . -pc_spai_nbstep <n> - set nbsteps 447 . -pc_spai_max <m> - set max 448 . -pc_spai_max_new <m> - set maxnew 449 . -pc_spai_block_size <n> - set block size 450 . -pc_spai_cache_size <n> - set cache size 451 . -pc_spai_sp <m> - set sp 452 - -pc_spai_set_verbose <true,false> - verbose output 453 454 Level: beginner 455 456 Note: 457 This only works with `MATAIJ` matrices. 458 459 References: 460 . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3) 461 462 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 463 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`, 464 `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()` 465 M*/ 466 467 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) { 468 PC_SPAI *ispai; 469 470 PetscFunctionBegin; 471 PetscCall(PetscNew(&ispai)); 472 pc->data = ispai; 473 474 pc->ops->destroy = PCDestroy_SPAI; 475 pc->ops->apply = PCApply_SPAI; 476 pc->ops->matapply = PCMatApply_SPAI; 477 pc->ops->applyrichardson = 0; 478 pc->ops->setup = PCSetUp_SPAI; 479 pc->ops->view = PCView_SPAI; 480 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 481 482 ispai->epsilon = .4; 483 ispai->nbsteps = 5; 484 ispai->max = 5000; 485 ispai->maxnew = 5; 486 ispai->block_size = 1; 487 ispai->cache_size = 5; 488 ispai->verbose = 0; 489 490 ispai->sp = 1; 491 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai))); 492 493 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI)); 494 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI)); 495 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI)); 496 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI)); 497 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI)); 498 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI)); 499 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI)); 500 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI)); 501 PetscFunctionReturn(0); 502 } 503 504 /* 505 Converts from a PETSc matrix to an SPAI matrix 506 */ 507 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) { 508 matrix *M; 509 int i, j, col; 510 int row_indx; 511 int len, pe, local_indx, start_indx; 512 int *mapping; 513 const int *cols; 514 const double *vals; 515 int n, mnl, nnl, nz, rstart, rend; 516 PetscMPIInt size, rank; 517 struct compressed_lines *rows; 518 519 PetscFunctionBegin; 520 PetscCallMPI(MPI_Comm_size(comm, &size)); 521 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 522 PetscCall(MatGetSize(A, &n, &n)); 523 PetscCall(MatGetLocalSize(A, &mnl, &nnl)); 524 525 /* 526 not sure why a barrier is required. commenting out 527 PetscCallMPI(MPI_Barrier(comm)); 528 */ 529 530 M = new_matrix((SPAI_Comm)comm); 531 532 M->n = n; 533 M->bs = 1; 534 M->max_block_size = 1; 535 536 M->mnls = (int *)malloc(sizeof(int) * size); 537 M->start_indices = (int *)malloc(sizeof(int) * size); 538 M->pe = (int *)malloc(sizeof(int) * n); 539 M->block_sizes = (int *)malloc(sizeof(int) * n); 540 for (i = 0; i < n; i++) M->block_sizes[i] = 1; 541 542 PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm)); 543 544 M->start_indices[0] = 0; 545 for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1]; 546 547 M->mnl = M->mnls[M->myid]; 548 M->my_start_index = M->start_indices[M->myid]; 549 550 for (i = 0; i < size; i++) { 551 start_indx = M->start_indices[i]; 552 for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i; 553 } 554 555 if (AT) { 556 M->lines = new_compressed_lines(M->mnls[rank], 1); 557 } else { 558 M->lines = new_compressed_lines(M->mnls[rank], 0); 559 } 560 561 rows = M->lines; 562 563 /* Determine the mapping from global indices to pointers */ 564 PetscCall(PetscMalloc1(M->n, &mapping)); 565 pe = 0; 566 local_indx = 0; 567 for (i = 0; i < M->n; i++) { 568 if (local_indx >= M->mnls[pe]) { 569 pe++; 570 local_indx = 0; 571 } 572 mapping[i] = local_indx + M->start_indices[pe]; 573 local_indx++; 574 } 575 576 /************** Set up the row structure *****************/ 577 578 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 579 for (i = rstart; i < rend; i++) { 580 row_indx = i - rstart; 581 PetscCall(MatGetRow(A, i, &nz, &cols, &vals)); 582 /* allocate buffers */ 583 rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 584 rows->A[row_indx] = (double *)malloc(nz * sizeof(double)); 585 /* copy the matrix */ 586 for (j = 0; j < nz; j++) { 587 col = cols[j]; 588 len = rows->len[row_indx]++; 589 590 rows->ptrs[row_indx][len] = mapping[col]; 591 rows->A[row_indx][len] = vals[j]; 592 } 593 rows->slen[row_indx] = rows->len[row_indx]; 594 595 PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals)); 596 } 597 598 /************** Set up the column structure *****************/ 599 600 if (AT) { 601 for (i = rstart; i < rend; i++) { 602 row_indx = i - rstart; 603 PetscCall(MatGetRow(AT, i, &nz, &cols, &vals)); 604 /* allocate buffers */ 605 rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int)); 606 /* copy the matrix (i.e., the structure) */ 607 for (j = 0; j < nz; j++) { 608 col = cols[j]; 609 len = rows->rlen[row_indx]++; 610 611 rows->rptrs[row_indx][len] = mapping[col]; 612 } 613 PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals)); 614 } 615 } 616 617 PetscCall(PetscFree(mapping)); 618 619 order_pointers(M); 620 M->maxnz = calc_maxnz(M); 621 *B = M; 622 PetscFunctionReturn(0); 623 } 624 625 /* 626 Converts from an SPAI matrix B to a PETSc matrix PB. 627 This assumes that the SPAI matrix B is stored in 628 COMPRESSED-ROW format. 629 */ 630 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) { 631 PetscMPIInt size, rank; 632 int m, n, M, N; 633 int d_nz, o_nz; 634 int *d_nnz, *o_nnz; 635 int i, k, global_row, global_col, first_diag_col, last_diag_col; 636 PetscScalar val; 637 638 PetscFunctionBegin; 639 PetscCallMPI(MPI_Comm_size(comm, &size)); 640 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 641 642 m = n = B->mnls[rank]; 643 d_nz = o_nz = 0; 644 645 /* Determine preallocation for MatCreateAIJ */ 646 PetscCall(PetscMalloc1(m, &d_nnz)); 647 PetscCall(PetscMalloc1(m, &o_nnz)); 648 for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0; 649 first_diag_col = B->start_indices[rank]; 650 last_diag_col = first_diag_col + B->mnls[rank]; 651 for (i = 0; i < B->mnls[rank]; i++) { 652 for (k = 0; k < B->lines->len[i]; k++) { 653 global_col = B->lines->ptrs[i][k]; 654 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 655 else o_nnz[i]++; 656 } 657 } 658 659 M = N = B->n; 660 /* Here we only know how to create AIJ format */ 661 PetscCall(MatCreate(comm, PB)); 662 PetscCall(MatSetSizes(*PB, m, n, M, N)); 663 PetscCall(MatSetType(*PB, MATAIJ)); 664 PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz)); 665 PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz)); 666 667 for (i = 0; i < B->mnls[rank]; i++) { 668 global_row = B->start_indices[rank] + i; 669 for (k = 0; k < B->lines->len[i]; k++) { 670 global_col = B->lines->ptrs[i][k]; 671 672 val = B->lines->A[i][k]; 673 PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES)); 674 } 675 } 676 677 PetscCall(PetscFree(d_nnz)); 678 PetscCall(PetscFree(o_nnz)); 679 680 PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY)); 681 PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY)); 682 PetscFunctionReturn(0); 683 } 684 685 /* 686 Converts from an SPAI vector v to a PETSc vec Pv. 687 */ 688 PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv) { 689 PetscMPIInt size, rank; 690 int m, M, i, *mnls, *start_indices, *global_indices; 691 692 PetscFunctionBegin; 693 PetscCallMPI(MPI_Comm_size(comm, &size)); 694 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 695 696 m = v->mnl; 697 M = v->n; 698 699 PetscCall(VecCreateMPI(comm, m, M, Pv)); 700 701 PetscCall(PetscMalloc1(size, &mnls)); 702 PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm)); 703 704 PetscCall(PetscMalloc1(size, &start_indices)); 705 706 start_indices[0] = 0; 707 for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1]; 708 709 PetscCall(PetscMalloc1(v->mnl, &global_indices)); 710 for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i; 711 712 PetscCall(PetscFree(mnls)); 713 PetscCall(PetscFree(start_indices)); 714 715 PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES)); 716 PetscCall(VecAssemblyBegin(*Pv)); 717 PetscCall(VecAssemblyEnd(*Pv)); 718 719 PetscCall(PetscFree(global_indices)); 720 PetscFunctionReturn(0); 721 } 722