1 /*
2 3/99 Modified by Stephen Barnard to support SPAI version 3.0
3 */
4
5 /*
6 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
7 Code written by Stephen Barnard.
8
9 Note: there is some BAD memory bleeding below!
10
11 This code needs work
12
13 1) get rid of all memory bleeding
14 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
15 rather than having the sp flag for PC_SPAI
16 3) fix to set the block size based on the matrix block size
17
18 */
19 #if !defined(PETSC_SKIP_COMPLEX)
20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 #endif
22
23 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
24 #include <../src/ksp/pc/impls/spai/petscspai.h>
25
26 /*
27 These are the SPAI include files
28 */
29 EXTERN_C_BEGIN
30 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
31 #include <spai.h>
32 #include <matrix.h>
33 EXTERN_C_END
34
35 static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
36 static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
37
38 typedef struct {
39 matrix *B; /* matrix in SPAI format */
40 matrix *BT; /* transpose of matrix in SPAI format */
41 matrix *M; /* the approximate inverse in SPAI format */
42
43 Mat PM; /* the approximate inverse PETSc format */
44
45 double epsilon; /* tolerance */
46 int nbsteps; /* max number of "improvement" steps per line */
47 int max; /* max dimensions of is_I, q, etc. */
48 int maxnew; /* max number of new entries per step */
49 int block_size; /* constant block size */
50 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
51 int verbose; /* SPAI prints timing and statistics */
52
53 int sp; /* symmetric nonzero pattern */
54 MPI_Comm comm_spai; /* communicator to be used with spai */
55 } PC_SPAI;
56
PCSetUp_SPAI(PC pc)57 static PetscErrorCode PCSetUp_SPAI(PC pc)
58 {
59 PC_SPAI *ispai = (PC_SPAI *)pc->data;
60 Mat AT;
61
62 PetscFunctionBegin;
63 init_SPAI();
64
65 if (ispai->sp) {
66 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
67 } else {
68 /* Use the transpose to get the column nonzero structure. */
69 PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
70 PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
71 PetscCall(MatDestroy(&AT));
72 }
73
74 /* Destroy the transpose */
75 /* Don't know how to do it. PETSc developers? */
76
77 /* construct SPAI preconditioner */
78 /* FILE *messages */ /* file for warning messages */
79 /* double epsilon */ /* tolerance */
80 /* int nbsteps */ /* max number of "improvement" steps per line */
81 /* int max */ /* max dimensions of is_I, q, etc. */
82 /* int maxnew */ /* max number of new entries per step */
83 /* int block_size */ /* block_size == 1 specifies scalar elements
84 block_size == n specifies nxn constant-block elements
85 block_size == 0 specifies variable-block elements */
86 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
87 /* int verbose */ /* verbose == 0 specifies that SPAI is silent
88 verbose == 1 prints timing and matrix statistics */
89
90 PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
91
92 PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
93
94 /* free the SPAI matrices */
95 sp_free_matrix(ispai->B);
96 sp_free_matrix(ispai->M);
97 PetscFunctionReturn(PETSC_SUCCESS);
98 }
99
PCApply_SPAI(PC pc,Vec xx,Vec y)100 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
101 {
102 PC_SPAI *ispai = (PC_SPAI *)pc->data;
103
104 PetscFunctionBegin;
105 /* Now using PETSc's multiply */
106 PetscCall(MatMult(ispai->PM, xx, y));
107 PetscFunctionReturn(PETSC_SUCCESS);
108 }
109
PCMatApply_SPAI(PC pc,Mat X,Mat Y)110 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
111 {
112 PC_SPAI *ispai = (PC_SPAI *)pc->data;
113
114 PetscFunctionBegin;
115 /* Now using PETSc's multiply */
116 PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
117 PetscFunctionReturn(PETSC_SUCCESS);
118 }
119
PCDestroy_SPAI(PC pc)120 static PetscErrorCode PCDestroy_SPAI(PC pc)
121 {
122 PC_SPAI *ispai = (PC_SPAI *)pc->data;
123
124 PetscFunctionBegin;
125 PetscCall(MatDestroy(&ispai->PM));
126 PetscCallMPI(MPI_Comm_free(&ispai->comm_spai));
127 PetscCall(PetscFree(pc->data));
128 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
129 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
130 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
131 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
132 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
133 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
134 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
135 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
136 PetscFunctionReturn(PETSC_SUCCESS);
137 }
138
PCView_SPAI(PC pc,PetscViewer viewer)139 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
140 {
141 PC_SPAI *ispai = (PC_SPAI *)pc->data;
142 PetscBool isascii;
143
144 PetscFunctionBegin;
145 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
146 if (isascii) {
147 PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon));
148 PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps));
149 PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max));
150 PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew));
151 PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size));
152 PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size));
153 PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose));
154 PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp));
155 }
156 PetscFunctionReturn(PETSC_SUCCESS);
157 }
158
PCSPAISetEpsilon_SPAI(PC pc,PetscReal epsilon1)159 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
160 {
161 PC_SPAI *ispai = (PC_SPAI *)pc->data;
162
163 PetscFunctionBegin;
164 ispai->epsilon = (double)epsilon1;
165 PetscFunctionReturn(PETSC_SUCCESS);
166 }
167
PCSPAISetNBSteps_SPAI(PC pc,PetscInt nbsteps1)168 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
169 {
170 PC_SPAI *ispai = (PC_SPAI *)pc->data;
171
172 PetscFunctionBegin;
173 ispai->nbsteps = (int)nbsteps1;
174 PetscFunctionReturn(PETSC_SUCCESS);
175 }
176
177 /* added 1/7/99 g.h. */
PCSPAISetMax_SPAI(PC pc,PetscInt max1)178 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
179 {
180 PC_SPAI *ispai = (PC_SPAI *)pc->data;
181
182 PetscFunctionBegin;
183 ispai->max = (int)max1;
184 PetscFunctionReturn(PETSC_SUCCESS);
185 }
186
PCSPAISetMaxNew_SPAI(PC pc,PetscInt maxnew1)187 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
188 {
189 PC_SPAI *ispai = (PC_SPAI *)pc->data;
190
191 PetscFunctionBegin;
192 ispai->maxnew = (int)maxnew1;
193 PetscFunctionReturn(PETSC_SUCCESS);
194 }
195
PCSPAISetBlockSize_SPAI(PC pc,PetscInt block_size1)196 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
197 {
198 PC_SPAI *ispai = (PC_SPAI *)pc->data;
199
200 PetscFunctionBegin;
201 ispai->block_size = (int)block_size1;
202 PetscFunctionReturn(PETSC_SUCCESS);
203 }
204
PCSPAISetCacheSize_SPAI(PC pc,PetscInt cache_size)205 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
206 {
207 PC_SPAI *ispai = (PC_SPAI *)pc->data;
208
209 PetscFunctionBegin;
210 ispai->cache_size = (int)cache_size;
211 PetscFunctionReturn(PETSC_SUCCESS);
212 }
213
PCSPAISetVerbose_SPAI(PC pc,PetscInt verbose)214 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
215 {
216 PC_SPAI *ispai = (PC_SPAI *)pc->data;
217
218 PetscFunctionBegin;
219 ispai->verbose = (int)verbose;
220 PetscFunctionReturn(PETSC_SUCCESS);
221 }
222
PCSPAISetSp_SPAI(PC pc,PetscInt sp)223 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
224 {
225 PC_SPAI *ispai = (PC_SPAI *)pc->data;
226
227 PetscFunctionBegin;
228 ispai->sp = (int)sp;
229 PetscFunctionReturn(PETSC_SUCCESS);
230 }
231
232 /*@
233 PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
234
235 Input Parameters:
236 + pc - the preconditioner
237 - epsilon1 - the tolerance (default .4)
238
239 Level: intermediate
240
241 Note:
242 `espilon1` must be between 0 and 1. It controls the
243 quality of the approximation of M to the inverse of
244 A. Higher values of `epsilon1` lead to more work, more
245 fill, and usually better preconditioners. In many
246 cases the best choice of `epsilon1` is the one that
247 divides the total solution time equally between the
248 preconditioner and the solver.
249
250 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
251 @*/
PCSPAISetEpsilon(PC pc,PetscReal epsilon1)252 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
253 {
254 PetscFunctionBegin;
255 PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
256 PetscFunctionReturn(PETSC_SUCCESS);
257 }
258
259 /*@
260 PCSPAISetNBSteps - set maximum number of improvement steps per row in
261 the `PCSPAI` preconditioner
262
263 Input Parameters:
264 + pc - the preconditioner
265 - nbsteps1 - number of steps (default 5)
266
267 Note:
268 `PCSPAI` constructs to approximation to every column of
269 the exact inverse of A in a series of improvement
270 steps. The quality of the approximation is determined
271 by epsilon. If an approximation achieving an accuracy
272 of epsilon is not obtained after `nbsteps1` steps, `PCSPAI` simply
273 uses the best approximation constructed so far.
274
275 Level: intermediate
276
277 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
278 @*/
PCSPAISetNBSteps(PC pc,PetscInt nbsteps1)279 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
280 {
281 PetscFunctionBegin;
282 PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
283 PetscFunctionReturn(PETSC_SUCCESS);
284 }
285
286 /* added 1/7/99 g.h. */
287 /*@
288 PCSPAISetMax - set the size of various working buffers in the `PCSPAI` preconditioner
289
290 Input Parameters:
291 + pc - the preconditioner
292 - max1 - size (default is 5000)
293
294 Level: intermediate
295
296 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
297 @*/
PCSPAISetMax(PC pc,PetscInt max1)298 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
299 {
300 PetscFunctionBegin;
301 PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
302 PetscFunctionReturn(PETSC_SUCCESS);
303 }
304
305 /*@
306 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step in the `PCSPAI` preconditioner
307
308 Input Parameters:
309 + pc - the preconditioner
310 - maxnew1 - maximum number (default 5)
311
312 Level: intermediate
313
314 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
315 @*/
PCSPAISetMaxNew(PC pc,PetscInt maxnew1)316 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
317 {
318 PetscFunctionBegin;
319 PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
320 PetscFunctionReturn(PETSC_SUCCESS);
321 }
322
323 /*@
324 PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
325
326 Input Parameters:
327 + pc - the preconditioner
328 - block_size1 - block size (default 1)
329
330 Level: intermediate
331
332 Notes:
333 A block
334 size of 1 treats A as a matrix of scalar elements. A
335 block size of s > 1 treats A as a matrix of sxs
336 blocks. A block size of 0 treats A as a matrix with
337 variable sized blocks, which are determined by
338 searching for dense square diagonal blocks in A.
339 This can be very effective for finite-element
340 matrices.
341
342 SPAI will convert A to block form, use a block
343 version of the preconditioner algorithm, and then
344 convert the result back to scalar form.
345
346 In many cases the a block-size parameter other than 1
347 can lead to very significant improvement in
348 performance.
349
350 Developer Note:
351 This preconditioner could use the matrix block size as the default block size to use
352
353 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
354 @*/
PCSPAISetBlockSize(PC pc,PetscInt block_size1)355 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
356 {
357 PetscFunctionBegin;
358 PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
359 PetscFunctionReturn(PETSC_SUCCESS);
360 }
361
362 /*@
363 PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
364
365 Input Parameters:
366 + pc - the preconditioner
367 - cache_size - cache size {0,1,2,3,4,5} (default 5)
368
369 Level: intermediate
370
371 Note:
372 `PCSPAI` uses a hash table to cache messages and avoid
373 redundant communication. If suggest always using
374 5. This parameter is irrelevant in the serial
375 version.
376
377 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
378 @*/
PCSPAISetCacheSize(PC pc,PetscInt cache_size)379 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
380 {
381 PetscFunctionBegin;
382 PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
383 PetscFunctionReturn(PETSC_SUCCESS);
384 }
385
386 /*@
387 PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
388
389 Input Parameters:
390 + pc - the preconditioner
391 - verbose - level (default 1)
392
393 Level: intermediate
394
395 Note:
396 Prints parameters, timings and matrix statistics
397
398 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
399 @*/
PCSPAISetVerbose(PC pc,PetscInt verbose)400 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
401 {
402 PetscFunctionBegin;
403 PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
404 PetscFunctionReturn(PETSC_SUCCESS);
405 }
406
407 /*@
408 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
409
410 Input Parameters:
411 + pc - the preconditioner
412 - sp - 0 or 1
413
414 Level: intermediate
415
416 Note:
417 If A has a symmetric nonzero pattern use `sp` 1 to
418 improve performance by eliminating some communication
419 in the parallel version. Even if A does not have a
420 symmetric nonzero pattern `sp` 1 may well lead to good
421 results, but the code will not follow the published
422 SPAI algorithm exactly.
423
424 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
425 @*/
PCSPAISetSp(PC pc,PetscInt sp)426 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
427 {
428 PetscFunctionBegin;
429 PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
430 PetscFunctionReturn(PETSC_SUCCESS);
431 }
432
PCSetFromOptions_SPAI(PC pc,PetscOptionItems PetscOptionsObject)433 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems PetscOptionsObject)
434 {
435 PC_SPAI *ispai = (PC_SPAI *)pc->data;
436 int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
437 double epsilon1;
438 PetscBool flg;
439
440 PetscFunctionBegin;
441 PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
442 PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
443 if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
444 PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
445 if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
446 /* added 1/7/99 g.h. */
447 PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
448 if (flg) PetscCall(PCSPAISetMax(pc, max1));
449 PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
450 if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
451 PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
452 if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
453 PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
454 if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
455 PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
456 if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
457 PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
458 if (flg) PetscCall(PCSPAISetSp(pc, sp));
459 PetscOptionsHeadEnd();
460 PetscFunctionReturn(PETSC_SUCCESS);
461 }
462
463 /*MC
464 PCSPAI - Use the Sparse Approximate Inverse method {cite}`gh97`
465
466 Options Database Keys:
467 + -pc_spai_epsilon <eps> - set tolerance
468 . -pc_spai_nbstep <n> - set nbsteps
469 . -pc_spai_max <m> - set max
470 . -pc_spai_max_new <m> - set maxnew
471 . -pc_spai_block_size <n> - set block size
472 . -pc_spai_cache_size <n> - set cache size
473 . -pc_spai_sp <m> - set sp
474 - -pc_spai_set_verbose <true,false> - verbose output
475
476 Level: beginner
477
478 Note:
479 This only works with `MATAIJ` matrices.
480
481 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
482 `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
483 `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
484 M*/
485
PCCreate_SPAI(PC pc)486 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
487 {
488 PC_SPAI *ispai;
489
490 PetscFunctionBegin;
491 PetscCall(PetscNew(&ispai));
492 pc->data = ispai;
493
494 pc->ops->destroy = PCDestroy_SPAI;
495 pc->ops->apply = PCApply_SPAI;
496 pc->ops->matapply = PCMatApply_SPAI;
497 pc->ops->applyrichardson = 0;
498 pc->ops->setup = PCSetUp_SPAI;
499 pc->ops->view = PCView_SPAI;
500 pc->ops->setfromoptions = PCSetFromOptions_SPAI;
501
502 ispai->epsilon = .4;
503 ispai->nbsteps = 5;
504 ispai->max = 5000;
505 ispai->maxnew = 5;
506 ispai->block_size = 1;
507 ispai->cache_size = 5;
508 ispai->verbose = 0;
509
510 ispai->sp = 1;
511 PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &ispai->comm_spai));
512
513 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
514 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
515 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
516 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
517 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
518 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
519 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
520 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
521 PetscFunctionReturn(PETSC_SUCCESS);
522 }
523
524 /*
525 Converts from a PETSc matrix to an SPAI matrix
526 */
ConvertMatToMatrix(MPI_Comm comm,Mat A,Mat AT,matrix ** B)527 static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
528 {
529 matrix *M;
530 int i, j, col;
531 int row_indx;
532 int len, pe, local_indx, start_indx;
533 int *mapping;
534 const int *cols;
535 const double *vals;
536 int n, mnl, nnl, nz, rstart, rend;
537 PetscMPIInt size, rank;
538 struct compressed_lines *rows;
539
540 PetscFunctionBegin;
541 PetscCallMPI(MPI_Comm_size(comm, &size));
542 PetscCallMPI(MPI_Comm_rank(comm, &rank));
543 PetscCall(MatGetSize(A, &n, &n));
544 PetscCall(MatGetLocalSize(A, &mnl, &nnl));
545
546 /*
547 not sure why a barrier is required. commenting out
548 PetscCallMPI(MPI_Barrier(comm));
549 */
550
551 M = new_matrix((SPAI_Comm)comm);
552
553 M->n = n;
554 M->bs = 1;
555 M->max_block_size = 1;
556
557 M->mnls = (int *)malloc(sizeof(int) * size);
558 M->start_indices = (int *)malloc(sizeof(int) * size);
559 M->pe = (int *)malloc(sizeof(int) * n);
560 M->block_sizes = (int *)malloc(sizeof(int) * n);
561 for (i = 0; i < n; i++) M->block_sizes[i] = 1;
562
563 PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
564
565 M->start_indices[0] = 0;
566 for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
567
568 M->mnl = M->mnls[M->myid];
569 M->my_start_index = M->start_indices[M->myid];
570
571 for (i = 0; i < size; i++) {
572 start_indx = M->start_indices[i];
573 for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
574 }
575
576 if (AT) {
577 M->lines = new_compressed_lines(M->mnls[rank], 1);
578 } else {
579 M->lines = new_compressed_lines(M->mnls[rank], 0);
580 }
581
582 rows = M->lines;
583
584 /* Determine the mapping from global indices to pointers */
585 PetscCall(PetscMalloc1(M->n, &mapping));
586 pe = 0;
587 local_indx = 0;
588 for (i = 0; i < M->n; i++) {
589 if (local_indx >= M->mnls[pe]) {
590 pe++;
591 local_indx = 0;
592 }
593 mapping[i] = local_indx + M->start_indices[pe];
594 local_indx++;
595 }
596
597 /************** Set up the row structure *****************/
598
599 PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
600 for (i = rstart; i < rend; i++) {
601 row_indx = i - rstart;
602 PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
603 /* allocate buffers */
604 rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
605 rows->A[row_indx] = (double *)malloc(nz * sizeof(double));
606 /* copy the matrix */
607 for (j = 0; j < nz; j++) {
608 col = cols[j];
609 len = rows->len[row_indx]++;
610
611 rows->ptrs[row_indx][len] = mapping[col];
612 rows->A[row_indx][len] = vals[j];
613 }
614 rows->slen[row_indx] = rows->len[row_indx];
615
616 PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
617 }
618
619 /************** Set up the column structure *****************/
620
621 if (AT) {
622 for (i = rstart; i < rend; i++) {
623 row_indx = i - rstart;
624 PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
625 /* allocate buffers */
626 rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
627 /* copy the matrix (i.e., the structure) */
628 for (j = 0; j < nz; j++) {
629 col = cols[j];
630 len = rows->rlen[row_indx]++;
631
632 rows->rptrs[row_indx][len] = mapping[col];
633 }
634 PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
635 }
636 }
637
638 PetscCall(PetscFree(mapping));
639
640 order_pointers(M);
641 M->maxnz = calc_maxnz(M);
642 *B = M;
643 PetscFunctionReturn(PETSC_SUCCESS);
644 }
645
646 /*
647 Converts from an SPAI matrix B to a PETSc matrix PB.
648 This assumes that the SPAI matrix B is stored in
649 COMPRESSED-ROW format.
650 */
ConvertMatrixToMat(MPI_Comm comm,matrix * B,Mat * PB)651 static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
652 {
653 PetscMPIInt size, rank;
654 int m, n, M, N;
655 int d_nz, o_nz;
656 int *d_nnz, *o_nnz;
657 int i, k, global_row, global_col, first_diag_col, last_diag_col;
658 PetscScalar val;
659
660 PetscFunctionBegin;
661 PetscCallMPI(MPI_Comm_size(comm, &size));
662 PetscCallMPI(MPI_Comm_rank(comm, &rank));
663
664 m = n = B->mnls[rank];
665 d_nz = o_nz = 0;
666
667 /* Determine preallocation for MatCreateAIJ */
668 PetscCall(PetscMalloc1(m, &d_nnz));
669 PetscCall(PetscMalloc1(m, &o_nnz));
670 for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
671 first_diag_col = B->start_indices[rank];
672 last_diag_col = first_diag_col + B->mnls[rank];
673 for (i = 0; i < B->mnls[rank]; i++) {
674 for (k = 0; k < B->lines->len[i]; k++) {
675 global_col = B->lines->ptrs[i][k];
676 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
677 else o_nnz[i]++;
678 }
679 }
680
681 M = N = B->n;
682 /* Here we only know how to create AIJ format */
683 PetscCall(MatCreate(comm, PB));
684 PetscCall(MatSetSizes(*PB, m, n, M, N));
685 PetscCall(MatSetType(*PB, MATAIJ));
686 PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
687 PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
688
689 for (i = 0; i < B->mnls[rank]; i++) {
690 global_row = B->start_indices[rank] + i;
691 for (k = 0; k < B->lines->len[i]; k++) {
692 global_col = B->lines->ptrs[i][k];
693
694 val = B->lines->A[i][k];
695 PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
696 }
697 }
698 PetscCall(PetscFree(d_nnz));
699 PetscCall(PetscFree(o_nnz));
700
701 PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
702 PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
703 PetscFunctionReturn(PETSC_SUCCESS);
704 }
705