xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision bcda9346efad4e5ba2d553af84eb238771ba1e25)
1 /*
2    3/99 Modified by Stephen Barnard to support SPAI version 3.0
3 */
4 
5 /*
6       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
7    Code written by Stephen Barnard.
8 
9       Note: there is some BAD memory bleeding below!
10 
11       This code needs work
12 
13    1) get rid of all memory bleeding
14    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
15       rather than having the sp flag for PC_SPAI
16    3) fix to set the block size based on the matrix block size
17 
18 */
19 #if !defined(PETSC_SKIP_COMPLEX)
20   #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 #endif
22 
23 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
24 #include <../src/ksp/pc/impls/spai/petscspai.h>
25 
26 /*
27     These are the SPAI include files
28 */
29 EXTERN_C_BEGIN
30 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
31 #include <spai.h>
32 #include <matrix.h>
33 EXTERN_C_END
34 
35 static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
36 static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
37 
38 typedef struct {
39   matrix *B;  /* matrix in SPAI format */
40   matrix *BT; /* transpose of matrix in SPAI format */
41   matrix *M;  /* the approximate inverse in SPAI format */
42 
43   Mat PM; /* the approximate inverse PETSc format */
44 
45   double epsilon;    /* tolerance */
46   int    nbsteps;    /* max number of "improvement" steps per line */
47   int    max;        /* max dimensions of is_I, q, etc. */
48   int    maxnew;     /* max number of new entries per step */
49   int    block_size; /* constant block size */
50   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
51   int    verbose;    /* SPAI prints timing and statistics */
52 
53   int      sp;        /* symmetric nonzero pattern */
54   MPI_Comm comm_spai; /* communicator to be used with spai */
55 } PC_SPAI;
56 
PCSetUp_SPAI(PC pc)57 static PetscErrorCode PCSetUp_SPAI(PC pc)
58 {
59   PC_SPAI *ispai = (PC_SPAI *)pc->data;
60   Mat      AT;
61 
62   PetscFunctionBegin;
63   init_SPAI();
64 
65   if (ispai->sp) {
66     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
67   } else {
68     /* Use the transpose to get the column nonzero structure. */
69     PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
70     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
71     PetscCall(MatDestroy(&AT));
72   }
73 
74   /* Destroy the transpose */
75   /* Don't know how to do it. PETSc developers? */
76 
77   /* construct SPAI preconditioner */
78   /* FILE *messages */    /* file for warning messages */
79   /* double epsilon */    /* tolerance */
80   /* int nbsteps */       /* max number of "improvement" steps per line */
81   /* int max */           /* max dimensions of is_I, q, etc. */
82   /* int maxnew */        /* max number of new entries per step */
83   /* int block_size */    /* block_size == 1 specifies scalar elements
84                               block_size == n specifies nxn constant-block elements
85                               block_size == 0 specifies variable-block elements */
86   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
87   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
88                               verbose == 1 prints timing and matrix statistics */
89 
90   PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
91 
92   PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
93 
94   /* free the SPAI matrices */
95   sp_free_matrix(ispai->B);
96   sp_free_matrix(ispai->M);
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
PCApply_SPAI(PC pc,Vec xx,Vec y)100 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
101 {
102   PC_SPAI *ispai = (PC_SPAI *)pc->data;
103 
104   PetscFunctionBegin;
105   /* Now using PETSc's multiply */
106   PetscCall(MatMult(ispai->PM, xx, y));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
PCMatApply_SPAI(PC pc,Mat X,Mat Y)110 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
111 {
112   PC_SPAI *ispai = (PC_SPAI *)pc->data;
113 
114   PetscFunctionBegin;
115   /* Now using PETSc's multiply */
116   PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
PCDestroy_SPAI(PC pc)120 static PetscErrorCode PCDestroy_SPAI(PC pc)
121 {
122   PC_SPAI *ispai = (PC_SPAI *)pc->data;
123 
124   PetscFunctionBegin;
125   PetscCall(MatDestroy(&ispai->PM));
126   PetscCallMPI(MPI_Comm_free(&ispai->comm_spai));
127   PetscCall(PetscFree(pc->data));
128   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
129   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
130   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
131   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
132   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
133   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
134   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
135   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
136   PetscFunctionReturn(PETSC_SUCCESS);
137 }
138 
PCView_SPAI(PC pc,PetscViewer viewer)139 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
140 {
141   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
142   PetscBool isascii;
143 
144   PetscFunctionBegin;
145   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
146   if (isascii) {
147     PetscCall(PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon));
148     PetscCall(PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps));
149     PetscCall(PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max));
150     PetscCall(PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew));
151     PetscCall(PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size));
152     PetscCall(PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size));
153     PetscCall(PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose));
154     PetscCall(PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp));
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
PCSPAISetEpsilon_SPAI(PC pc,PetscReal epsilon1)159 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
160 {
161   PC_SPAI *ispai = (PC_SPAI *)pc->data;
162 
163   PetscFunctionBegin;
164   ispai->epsilon = (double)epsilon1;
165   PetscFunctionReturn(PETSC_SUCCESS);
166 }
167 
PCSPAISetNBSteps_SPAI(PC pc,PetscInt nbsteps1)168 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
169 {
170   PC_SPAI *ispai = (PC_SPAI *)pc->data;
171 
172   PetscFunctionBegin;
173   ispai->nbsteps = (int)nbsteps1;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 /* added 1/7/99 g.h. */
PCSPAISetMax_SPAI(PC pc,PetscInt max1)178 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
179 {
180   PC_SPAI *ispai = (PC_SPAI *)pc->data;
181 
182   PetscFunctionBegin;
183   ispai->max = (int)max1;
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
PCSPAISetMaxNew_SPAI(PC pc,PetscInt maxnew1)187 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
188 {
189   PC_SPAI *ispai = (PC_SPAI *)pc->data;
190 
191   PetscFunctionBegin;
192   ispai->maxnew = (int)maxnew1;
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
PCSPAISetBlockSize_SPAI(PC pc,PetscInt block_size1)196 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
197 {
198   PC_SPAI *ispai = (PC_SPAI *)pc->data;
199 
200   PetscFunctionBegin;
201   ispai->block_size = (int)block_size1;
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 
PCSPAISetCacheSize_SPAI(PC pc,PetscInt cache_size)205 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
206 {
207   PC_SPAI *ispai = (PC_SPAI *)pc->data;
208 
209   PetscFunctionBegin;
210   ispai->cache_size = (int)cache_size;
211   PetscFunctionReturn(PETSC_SUCCESS);
212 }
213 
PCSPAISetVerbose_SPAI(PC pc,PetscInt verbose)214 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
215 {
216   PC_SPAI *ispai = (PC_SPAI *)pc->data;
217 
218   PetscFunctionBegin;
219   ispai->verbose = (int)verbose;
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
PCSPAISetSp_SPAI(PC pc,PetscInt sp)223 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
224 {
225   PC_SPAI *ispai = (PC_SPAI *)pc->data;
226 
227   PetscFunctionBegin;
228   ispai->sp = (int)sp;
229   PetscFunctionReturn(PETSC_SUCCESS);
230 }
231 
232 /*@
233   PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
234 
235   Input Parameters:
236 + pc       - the preconditioner
237 - epsilon1 - the tolerance (default .4)
238 
239   Level: intermediate
240 
241   Note:
242   `espilon1` must be between 0 and 1. It controls the
243   quality of the approximation of M to the inverse of
244   A. Higher values of `epsilon1` lead to more work, more
245   fill, and usually better preconditioners. In many
246   cases the best choice of `epsilon1` is the one that
247   divides the total solution time equally between the
248   preconditioner and the solver.
249 
250 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
251   @*/
PCSPAISetEpsilon(PC pc,PetscReal epsilon1)252 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
253 {
254   PetscFunctionBegin;
255   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 /*@
260   PCSPAISetNBSteps - set maximum number of improvement steps per row in
261   the `PCSPAI` preconditioner
262 
263   Input Parameters:
264 + pc       - the preconditioner
265 - nbsteps1 - number of steps (default 5)
266 
267   Note:
268   `PCSPAI` constructs to approximation to every column of
269   the exact inverse of A in a series of improvement
270   steps. The quality of the approximation is determined
271   by epsilon. If an approximation achieving an accuracy
272   of epsilon is not obtained after `nbsteps1` steps, `PCSPAI` simply
273   uses the best approximation constructed so far.
274 
275   Level: intermediate
276 
277 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
278 @*/
PCSPAISetNBSteps(PC pc,PetscInt nbsteps1)279 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
280 {
281   PetscFunctionBegin;
282   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 
286 /* added 1/7/99 g.h. */
287 /*@
288   PCSPAISetMax - set the size of various working buffers in the `PCSPAI` preconditioner
289 
290   Input Parameters:
291 + pc   - the preconditioner
292 - max1 - size (default is 5000)
293 
294   Level: intermediate
295 
296 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
297 @*/
PCSPAISetMax(PC pc,PetscInt max1)298 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
299 {
300   PetscFunctionBegin;
301   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
302   PetscFunctionReturn(PETSC_SUCCESS);
303 }
304 
305 /*@
306   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step in the `PCSPAI` preconditioner
307 
308   Input Parameters:
309 + pc      - the preconditioner
310 - maxnew1 - maximum number (default 5)
311 
312   Level: intermediate
313 
314 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
315 @*/
PCSPAISetMaxNew(PC pc,PetscInt maxnew1)316 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
317 {
318   PetscFunctionBegin;
319   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@
324   PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
325 
326   Input Parameters:
327 + pc          - the preconditioner
328 - block_size1 - block size (default 1)
329 
330   Level: intermediate
331 
332   Notes:
333   A block
334   size of 1 treats A as a matrix of scalar elements. A
335   block size of s > 1 treats A as a matrix of sxs
336   blocks. A block size of 0 treats A as a matrix with
337   variable sized blocks, which are determined by
338   searching for dense square diagonal blocks in A.
339   This can be very effective for finite-element
340   matrices.
341 
342   SPAI will convert A to block form, use a block
343   version of the preconditioner algorithm, and then
344   convert the result back to scalar form.
345 
346   In many cases the a block-size parameter other than 1
347   can lead to very significant improvement in
348   performance.
349 
350   Developer Note:
351   This preconditioner could use the matrix block size as the default block size to use
352 
353 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
354 @*/
PCSPAISetBlockSize(PC pc,PetscInt block_size1)355 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
356 {
357   PetscFunctionBegin;
358   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*@
363   PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
364 
365   Input Parameters:
366 + pc         - the preconditioner
367 - cache_size - cache size {0,1,2,3,4,5} (default 5)
368 
369   Level: intermediate
370 
371   Note:
372   `PCSPAI` uses a hash table to cache messages and avoid
373   redundant communication. If suggest always using
374   5. This parameter is irrelevant in the serial
375   version.
376 
377 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
378 @*/
PCSPAISetCacheSize(PC pc,PetscInt cache_size)379 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
380 {
381   PetscFunctionBegin;
382   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*@
387   PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
388 
389   Input Parameters:
390 + pc      - the preconditioner
391 - verbose - level (default 1)
392 
393   Level: intermediate
394 
395   Note:
396   Prints parameters, timings and matrix statistics
397 
398 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
399 @*/
PCSPAISetVerbose(PC pc,PetscInt verbose)400 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
401 {
402   PetscFunctionBegin;
403   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
404   PetscFunctionReturn(PETSC_SUCCESS);
405 }
406 
407 /*@
408   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
409 
410   Input Parameters:
411 + pc - the preconditioner
412 - sp - 0 or 1
413 
414   Level: intermediate
415 
416   Note:
417   If A has a symmetric nonzero pattern use `sp` 1 to
418   improve performance by eliminating some communication
419   in the parallel version. Even if A does not have a
420   symmetric nonzero pattern `sp` 1 may well lead to good
421   results, but the code will not follow the published
422   SPAI algorithm exactly.
423 
424 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
425 @*/
PCSPAISetSp(PC pc,PetscInt sp)426 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
427 {
428   PetscFunctionBegin;
429   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
PCSetFromOptions_SPAI(PC pc,PetscOptionItems PetscOptionsObject)433 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems PetscOptionsObject)
434 {
435   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
436   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
437   double    epsilon1;
438   PetscBool flg;
439 
440   PetscFunctionBegin;
441   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
442   PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
443   if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
444   PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
445   if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
446   /* added 1/7/99 g.h. */
447   PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
448   if (flg) PetscCall(PCSPAISetMax(pc, max1));
449   PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
450   if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
451   PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
452   if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
453   PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
454   if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
455   PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
456   if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
457   PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
458   if (flg) PetscCall(PCSPAISetSp(pc, sp));
459   PetscOptionsHeadEnd();
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 /*MC
464    PCSPAI - Use the Sparse Approximate Inverse method {cite}`gh97`
465 
466    Options Database Keys:
467 +  -pc_spai_epsilon <eps>            - set tolerance
468 .  -pc_spai_nbstep <n>               - set nbsteps
469 .  -pc_spai_max <m>                  - set max
470 .  -pc_spai_max_new <m>              - set maxnew
471 .  -pc_spai_block_size <n>           - set block size
472 .  -pc_spai_cache_size <n>           - set cache size
473 .  -pc_spai_sp <m>                   - set sp
474 -  -pc_spai_set_verbose <true,false> - verbose output
475 
476    Level: beginner
477 
478    Note:
479     This only works with `MATAIJ` matrices.
480 
481 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
482           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
483           `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
484 M*/
485 
PCCreate_SPAI(PC pc)486 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
487 {
488   PC_SPAI *ispai;
489 
490   PetscFunctionBegin;
491   PetscCall(PetscNew(&ispai));
492   pc->data = ispai;
493 
494   pc->ops->destroy         = PCDestroy_SPAI;
495   pc->ops->apply           = PCApply_SPAI;
496   pc->ops->matapply        = PCMatApply_SPAI;
497   pc->ops->applyrichardson = 0;
498   pc->ops->setup           = PCSetUp_SPAI;
499   pc->ops->view            = PCView_SPAI;
500   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
501 
502   ispai->epsilon    = .4;
503   ispai->nbsteps    = 5;
504   ispai->max        = 5000;
505   ispai->maxnew     = 5;
506   ispai->block_size = 1;
507   ispai->cache_size = 5;
508   ispai->verbose    = 0;
509 
510   ispai->sp = 1;
511   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &ispai->comm_spai));
512 
513   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
514   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
515   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
516   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
517   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
518   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
519   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
520   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
521   PetscFunctionReturn(PETSC_SUCCESS);
522 }
523 
524 /*
525    Converts from a PETSc matrix to an SPAI matrix
526 */
ConvertMatToMatrix(MPI_Comm comm,Mat A,Mat AT,matrix ** B)527 static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
528 {
529   matrix                  *M;
530   int                      i, j, col;
531   int                      row_indx;
532   int                      len, pe, local_indx, start_indx;
533   int                     *mapping;
534   const int               *cols;
535   const double            *vals;
536   int                      n, mnl, nnl, nz, rstart, rend;
537   PetscMPIInt              size, rank;
538   struct compressed_lines *rows;
539 
540   PetscFunctionBegin;
541   PetscCallMPI(MPI_Comm_size(comm, &size));
542   PetscCallMPI(MPI_Comm_rank(comm, &rank));
543   PetscCall(MatGetSize(A, &n, &n));
544   PetscCall(MatGetLocalSize(A, &mnl, &nnl));
545 
546   /*
547     not sure why a barrier is required. commenting out
548   PetscCallMPI(MPI_Barrier(comm));
549   */
550 
551   M = new_matrix((SPAI_Comm)comm);
552 
553   M->n              = n;
554   M->bs             = 1;
555   M->max_block_size = 1;
556 
557   M->mnls          = (int *)malloc(sizeof(int) * size);
558   M->start_indices = (int *)malloc(sizeof(int) * size);
559   M->pe            = (int *)malloc(sizeof(int) * n);
560   M->block_sizes   = (int *)malloc(sizeof(int) * n);
561   for (i = 0; i < n; i++) M->block_sizes[i] = 1;
562 
563   PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
564 
565   M->start_indices[0] = 0;
566   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
567 
568   M->mnl            = M->mnls[M->myid];
569   M->my_start_index = M->start_indices[M->myid];
570 
571   for (i = 0; i < size; i++) {
572     start_indx = M->start_indices[i];
573     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
574   }
575 
576   if (AT) {
577     M->lines = new_compressed_lines(M->mnls[rank], 1);
578   } else {
579     M->lines = new_compressed_lines(M->mnls[rank], 0);
580   }
581 
582   rows = M->lines;
583 
584   /* Determine the mapping from global indices to pointers */
585   PetscCall(PetscMalloc1(M->n, &mapping));
586   pe         = 0;
587   local_indx = 0;
588   for (i = 0; i < M->n; i++) {
589     if (local_indx >= M->mnls[pe]) {
590       pe++;
591       local_indx = 0;
592     }
593     mapping[i] = local_indx + M->start_indices[pe];
594     local_indx++;
595   }
596 
597   /************** Set up the row structure *****************/
598 
599   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
600   for (i = rstart; i < rend; i++) {
601     row_indx = i - rstart;
602     PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
603     /* allocate buffers */
604     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
605     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
606     /* copy the matrix */
607     for (j = 0; j < nz; j++) {
608       col = cols[j];
609       len = rows->len[row_indx]++;
610 
611       rows->ptrs[row_indx][len] = mapping[col];
612       rows->A[row_indx][len]    = vals[j];
613     }
614     rows->slen[row_indx] = rows->len[row_indx];
615 
616     PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
617   }
618 
619   /************** Set up the column structure *****************/
620 
621   if (AT) {
622     for (i = rstart; i < rend; i++) {
623       row_indx = i - rstart;
624       PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
625       /* allocate buffers */
626       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
627       /* copy the matrix (i.e., the structure) */
628       for (j = 0; j < nz; j++) {
629         col = cols[j];
630         len = rows->rlen[row_indx]++;
631 
632         rows->rptrs[row_indx][len] = mapping[col];
633       }
634       PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
635     }
636   }
637 
638   PetscCall(PetscFree(mapping));
639 
640   order_pointers(M);
641   M->maxnz = calc_maxnz(M);
642   *B       = M;
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 /*
647    Converts from an SPAI matrix B  to a PETSc matrix PB.
648    This assumes that the SPAI matrix B is stored in
649    COMPRESSED-ROW format.
650 */
ConvertMatrixToMat(MPI_Comm comm,matrix * B,Mat * PB)651 static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
652 {
653   PetscMPIInt size, rank;
654   int         m, n, M, N;
655   int         d_nz, o_nz;
656   int        *d_nnz, *o_nnz;
657   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
658   PetscScalar val;
659 
660   PetscFunctionBegin;
661   PetscCallMPI(MPI_Comm_size(comm, &size));
662   PetscCallMPI(MPI_Comm_rank(comm, &rank));
663 
664   m = n = B->mnls[rank];
665   d_nz = o_nz = 0;
666 
667   /* Determine preallocation for MatCreateAIJ */
668   PetscCall(PetscMalloc1(m, &d_nnz));
669   PetscCall(PetscMalloc1(m, &o_nnz));
670   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
671   first_diag_col = B->start_indices[rank];
672   last_diag_col  = first_diag_col + B->mnls[rank];
673   for (i = 0; i < B->mnls[rank]; i++) {
674     for (k = 0; k < B->lines->len[i]; k++) {
675       global_col = B->lines->ptrs[i][k];
676       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
677       else o_nnz[i]++;
678     }
679   }
680 
681   M = N = B->n;
682   /* Here we only know how to create AIJ format */
683   PetscCall(MatCreate(comm, PB));
684   PetscCall(MatSetSizes(*PB, m, n, M, N));
685   PetscCall(MatSetType(*PB, MATAIJ));
686   PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
687   PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
688 
689   for (i = 0; i < B->mnls[rank]; i++) {
690     global_row = B->start_indices[rank] + i;
691     for (k = 0; k < B->lines->len[i]; k++) {
692       global_col = B->lines->ptrs[i][k];
693 
694       val = B->lines->A[i][k];
695       PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
696     }
697   }
698   PetscCall(PetscFree(d_nnz));
699   PetscCall(PetscFree(o_nnz));
700 
701   PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
702   PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705