xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision c87f018dd60dd7cbbffff5f1ec4e5d075ae0fc6f)
1 /*
2    3/99 Modified by Stephen Barnard to support SPAI version 3.0
3 */
4 
5 /*
6       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
7    Code written by Stephen Barnard.
8 
9       Note: there is some BAD memory bleeding below!
10 
11       This code needs work
12 
13    1) get rid of all memory bleeding
14    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
15       rather than having the sp flag for PC_SPAI
16    3) fix to set the block size based on the matrix block size
17 
18 */
19 #if !defined(PETSC_SKIP_COMPLEX)
20   #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 #endif
22 
23 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
24 #include <../src/ksp/pc/impls/spai/petscspai.h>
25 
26 /*
27     These are the SPAI include files
28 */
29 EXTERN_C_BEGIN
30 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
31 #include <spai.h>
32 #include <matrix.h>
33 EXTERN_C_END
34 
35 static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
36 static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
37 
38 typedef struct {
39   matrix *B;  /* matrix in SPAI format */
40   matrix *BT; /* transpose of matrix in SPAI format */
41   matrix *M;  /* the approximate inverse in SPAI format */
42 
43   Mat PM; /* the approximate inverse PETSc format */
44 
45   double epsilon;    /* tolerance */
46   int    nbsteps;    /* max number of "improvement" steps per line */
47   int    max;        /* max dimensions of is_I, q, etc. */
48   int    maxnew;     /* max number of new entries per step */
49   int    block_size; /* constant block size */
50   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
51   int    verbose;    /* SPAI prints timing and statistics */
52 
53   int      sp;        /* symmetric nonzero pattern */
54   MPI_Comm comm_spai; /* communicator to be used with spai */
55 } PC_SPAI;
56 
57 static PetscErrorCode PCSetUp_SPAI(PC pc)
58 {
59   PC_SPAI *ispai = (PC_SPAI *)pc->data;
60   Mat      AT;
61 
62   PetscFunctionBegin;
63   init_SPAI();
64 
65   if (ispai->sp) {
66     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
67   } else {
68     /* Use the transpose to get the column nonzero structure. */
69     PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
70     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
71     PetscCall(MatDestroy(&AT));
72   }
73 
74   /* Destroy the transpose */
75   /* Don't know how to do it. PETSc developers? */
76 
77   /* construct SPAI preconditioner */
78   /* FILE *messages */    /* file for warning messages */
79   /* double epsilon */    /* tolerance */
80   /* int nbsteps */       /* max number of "improvement" steps per line */
81   /* int max */           /* max dimensions of is_I, q, etc. */
82   /* int maxnew */        /* max number of new entries per step */
83   /* int block_size */    /* block_size == 1 specifies scalar elements
84                               block_size == n specifies nxn constant-block elements
85                               block_size == 0 specifies variable-block elements */
86   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
87   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
88                               verbose == 1 prints timing and matrix statistics */
89 
90   PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
91 
92   PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
93 
94   /* free the SPAI matrices */
95   sp_free_matrix(ispai->B);
96   sp_free_matrix(ispai->M);
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
101 {
102   PC_SPAI *ispai = (PC_SPAI *)pc->data;
103 
104   PetscFunctionBegin;
105   /* Now using PETSc's multiply */
106   PetscCall(MatMult(ispai->PM, xx, y));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
111 {
112   PC_SPAI *ispai = (PC_SPAI *)pc->data;
113 
114   PetscFunctionBegin;
115   /* Now using PETSc's multiply */
116   PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 static PetscErrorCode PCDestroy_SPAI(PC pc)
121 {
122   PC_SPAI *ispai = (PC_SPAI *)pc->data;
123 
124   PetscFunctionBegin;
125   PetscCall(MatDestroy(&ispai->PM));
126   PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
127   PetscCall(PetscFree(pc->data));
128   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
129   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
130   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
131   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
132   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
133   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
134   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
135   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
136   PetscFunctionReturn(PETSC_SUCCESS);
137 }
138 
139 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
140 {
141   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
142   PetscBool iascii;
143 
144   PetscFunctionBegin;
145   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
146   if (iascii) {
147     PetscCall(PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon));
148     PetscCall(PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps));
149     PetscCall(PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max));
150     PetscCall(PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew));
151     PetscCall(PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size));
152     PetscCall(PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size));
153     PetscCall(PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose));
154     PetscCall(PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp));
155   }
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
160 {
161   PC_SPAI *ispai = (PC_SPAI *)pc->data;
162 
163   PetscFunctionBegin;
164   ispai->epsilon = (double)epsilon1;
165   PetscFunctionReturn(PETSC_SUCCESS);
166 }
167 
168 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
169 {
170   PC_SPAI *ispai = (PC_SPAI *)pc->data;
171 
172   PetscFunctionBegin;
173   ispai->nbsteps = (int)nbsteps1;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 /* added 1/7/99 g.h. */
178 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
179 {
180   PC_SPAI *ispai = (PC_SPAI *)pc->data;
181 
182   PetscFunctionBegin;
183   ispai->max = (int)max1;
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
188 {
189   PC_SPAI *ispai = (PC_SPAI *)pc->data;
190 
191   PetscFunctionBegin;
192   ispai->maxnew = (int)maxnew1;
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
196 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
197 {
198   PC_SPAI *ispai = (PC_SPAI *)pc->data;
199 
200   PetscFunctionBegin;
201   ispai->block_size = (int)block_size1;
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 
205 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
206 {
207   PC_SPAI *ispai = (PC_SPAI *)pc->data;
208 
209   PetscFunctionBegin;
210   ispai->cache_size = (int)cache_size;
211   PetscFunctionReturn(PETSC_SUCCESS);
212 }
213 
214 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
215 {
216   PC_SPAI *ispai = (PC_SPAI *)pc->data;
217 
218   PetscFunctionBegin;
219   ispai->verbose = (int)verbose;
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
223 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
224 {
225   PC_SPAI *ispai = (PC_SPAI *)pc->data;
226 
227   PetscFunctionBegin;
228   ispai->sp = (int)sp;
229   PetscFunctionReturn(PETSC_SUCCESS);
230 }
231 
232 /*@
233   PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
234 
235   Input Parameters:
236 + pc       - the preconditioner
237 - epsilon1 - epsilon (default .4)
238 
239   Note:
240   Espilon must be between 0 and 1. It controls the
241   quality of the approximation of M to the inverse of
242   A. Higher values of epsilon lead to more work, more
243   fill, and usually better preconditioners. In many
244   cases the best choice of epsilon is the one that
245   divides the total solution time equally between the
246   preconditioner and the solver.
247 
248   Level: intermediate
249 
250 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
251   @*/
252 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
253 {
254   PetscFunctionBegin;
255   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 /*@
260   PCSPAISetNBSteps - set maximum number of improvement steps per row in
261   the `PCSPAI` preconditioner
262 
263   Input Parameters:
264 + pc       - the preconditioner
265 - nbsteps1 - number of steps (default 5)
266 
267   Note:
268   `PCSPAI` constructs to approximation to every column of
269   the exact inverse of A in a series of improvement
270   steps. The quality of the approximation is determined
271   by epsilon. If an approximation achieving an accuracy
272   of epsilon is not obtained after ns steps, SPAI simply
273   uses the best approximation constructed so far.
274 
275   Level: intermediate
276 
277 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
278 @*/
279 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
280 {
281   PetscFunctionBegin;
282   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 
286 /* added 1/7/99 g.h. */
287 /*@
288   PCSPAISetMax - set the size of various working buffers in
289   the `PCSPAI` preconditioner
290 
291   Input Parameters:
292 + pc   - the preconditioner
293 - max1 - size (default is 5000)
294 
295   Level: intermediate
296 
297 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
298 @*/
299 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
300 {
301   PetscFunctionBegin;
302   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
303   PetscFunctionReturn(PETSC_SUCCESS);
304 }
305 
306 /*@
307   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
308   in `PCSPAI` preconditioner
309 
310   Input Parameters:
311 + pc      - the preconditioner
312 - maxnew1 - maximum number (default 5)
313 
314   Level: intermediate
315 
316 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
317 @*/
318 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
319 {
320   PetscFunctionBegin;
321   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 /*@
326   PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
327 
328   Input Parameters:
329 + pc          - the preconditioner
330 - block_size1 - block size (default 1)
331 
332   Notes:
333   A block
334   size of 1 treats A as a matrix of scalar elements. A
335   block size of s > 1 treats A as a matrix of sxs
336   blocks. A block size of 0 treats A as a matrix with
337   variable sized blocks, which are determined by
338   searching for dense square diagonal blocks in A.
339   This can be very effective for finite-element
340   matrices.
341 
342   SPAI will convert A to block form, use a block
343   version of the preconditioner algorithm, and then
344   convert the result back to scalar form.
345 
346   In many cases the a block-size parameter other than 1
347   can lead to very significant improvement in
348   performance.
349 
350   Level: intermediate
351 
352 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
353 @*/
354 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
355 {
356   PetscFunctionBegin;
357   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 /*@
362   PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
363 
364   Input Parameters:
365 + pc         - the preconditioner
366 - cache_size - cache size {0,1,2,3,4,5} (default 5)
367 
368   Note:
369   `PCSPAI` uses a hash table to cache messages and avoid
370   redundant communication. If suggest always using
371   5. This parameter is irrelevant in the serial
372   version.
373 
374   Level: intermediate
375 
376 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
377 @*/
378 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
379 {
380   PetscFunctionBegin;
381   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
382   PetscFunctionReturn(PETSC_SUCCESS);
383 }
384 
385 /*@
386   PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
387 
388   Input Parameters:
389 + pc      - the preconditioner
390 - verbose - level (default 1)
391 
392   Note:
393   print parameters, timings and matrix statistics
394 
395   Level: intermediate
396 
397 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
398 @*/
399 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
400 {
401   PetscFunctionBegin;
402   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 /*@
407   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
408 
409   Input Parameters:
410 + pc - the preconditioner
411 - sp - 0 or 1
412 
413   Note:
414   If A has a symmetric nonzero pattern use -sp 1 to
415   improve performance by eliminating some communication
416   in the parallel version. Even if A does not have a
417   symmetric nonzero pattern -sp 1 may well lead to good
418   results, but the code will not follow the published
419   SPAI algorithm exactly.
420 
421   Level: intermediate
422 
423 .seealso: [](ch_ksp), `PCSPAI`, `PCSetType()`
424 @*/
425 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
426 {
427   PetscFunctionBegin;
428   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
429   PetscFunctionReturn(PETSC_SUCCESS);
430 }
431 
432 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
433 {
434   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
435   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
436   double    epsilon1;
437   PetscBool flg;
438 
439   PetscFunctionBegin;
440   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
441   PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
442   if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
443   PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
444   if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
445   /* added 1/7/99 g.h. */
446   PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
447   if (flg) PetscCall(PCSPAISetMax(pc, max1));
448   PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
449   if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
450   PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
451   if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
452   PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
453   if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
454   PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
455   if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
456   PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
457   if (flg) PetscCall(PCSPAISetSp(pc, sp));
458   PetscOptionsHeadEnd();
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 /*MC
463    PCSPAI - Use the Sparse Approximate Inverse method
464 
465    Options Database Keys:
466 +  -pc_spai_epsilon <eps> - set tolerance
467 .  -pc_spai_nbstep <n> - set nbsteps
468 .  -pc_spai_max <m> - set max
469 .  -pc_spai_max_new <m> - set maxnew
470 .  -pc_spai_block_size <n> - set block size
471 .  -pc_spai_cache_size <n> - set cache size
472 .  -pc_spai_sp <m> - set sp
473 -  -pc_spai_set_verbose <true,false> - verbose output
474 
475    Level: beginner
476 
477    Note:
478     This only works with `MATAIJ` matrices.
479 
480    References:
481  . * -  Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)
482 
483 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
484           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
485           `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
486 M*/
487 
488 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
489 {
490   PC_SPAI *ispai;
491 
492   PetscFunctionBegin;
493   PetscCall(PetscNew(&ispai));
494   pc->data = ispai;
495 
496   pc->ops->destroy         = PCDestroy_SPAI;
497   pc->ops->apply           = PCApply_SPAI;
498   pc->ops->matapply        = PCMatApply_SPAI;
499   pc->ops->applyrichardson = 0;
500   pc->ops->setup           = PCSetUp_SPAI;
501   pc->ops->view            = PCView_SPAI;
502   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
503 
504   ispai->epsilon    = .4;
505   ispai->nbsteps    = 5;
506   ispai->max        = 5000;
507   ispai->maxnew     = 5;
508   ispai->block_size = 1;
509   ispai->cache_size = 5;
510   ispai->verbose    = 0;
511 
512   ispai->sp = 1;
513   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));
514 
515   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
516   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
517   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
518   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
519   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
520   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
521   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
522   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
523   PetscFunctionReturn(PETSC_SUCCESS);
524 }
525 
526 /*
527    Converts from a PETSc matrix to an SPAI matrix
528 */
529 static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
530 {
531   matrix                  *M;
532   int                      i, j, col;
533   int                      row_indx;
534   int                      len, pe, local_indx, start_indx;
535   int                     *mapping;
536   const int               *cols;
537   const double            *vals;
538   int                      n, mnl, nnl, nz, rstart, rend;
539   PetscMPIInt              size, rank;
540   struct compressed_lines *rows;
541 
542   PetscFunctionBegin;
543   PetscCallMPI(MPI_Comm_size(comm, &size));
544   PetscCallMPI(MPI_Comm_rank(comm, &rank));
545   PetscCall(MatGetSize(A, &n, &n));
546   PetscCall(MatGetLocalSize(A, &mnl, &nnl));
547 
548   /*
549     not sure why a barrier is required. commenting out
550   PetscCallMPI(MPI_Barrier(comm));
551   */
552 
553   M = new_matrix((SPAI_Comm)comm);
554 
555   M->n              = n;
556   M->bs             = 1;
557   M->max_block_size = 1;
558 
559   M->mnls          = (int *)malloc(sizeof(int) * size);
560   M->start_indices = (int *)malloc(sizeof(int) * size);
561   M->pe            = (int *)malloc(sizeof(int) * n);
562   M->block_sizes   = (int *)malloc(sizeof(int) * n);
563   for (i = 0; i < n; i++) M->block_sizes[i] = 1;
564 
565   PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
566 
567   M->start_indices[0] = 0;
568   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
569 
570   M->mnl            = M->mnls[M->myid];
571   M->my_start_index = M->start_indices[M->myid];
572 
573   for (i = 0; i < size; i++) {
574     start_indx = M->start_indices[i];
575     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
576   }
577 
578   if (AT) {
579     M->lines = new_compressed_lines(M->mnls[rank], 1);
580   } else {
581     M->lines = new_compressed_lines(M->mnls[rank], 0);
582   }
583 
584   rows = M->lines;
585 
586   /* Determine the mapping from global indices to pointers */
587   PetscCall(PetscMalloc1(M->n, &mapping));
588   pe         = 0;
589   local_indx = 0;
590   for (i = 0; i < M->n; i++) {
591     if (local_indx >= M->mnls[pe]) {
592       pe++;
593       local_indx = 0;
594     }
595     mapping[i] = local_indx + M->start_indices[pe];
596     local_indx++;
597   }
598 
599   /************** Set up the row structure *****************/
600 
601   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
602   for (i = rstart; i < rend; i++) {
603     row_indx = i - rstart;
604     PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
605     /* allocate buffers */
606     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
607     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
608     /* copy the matrix */
609     for (j = 0; j < nz; j++) {
610       col = cols[j];
611       len = rows->len[row_indx]++;
612 
613       rows->ptrs[row_indx][len] = mapping[col];
614       rows->A[row_indx][len]    = vals[j];
615     }
616     rows->slen[row_indx] = rows->len[row_indx];
617 
618     PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
619   }
620 
621   /************** Set up the column structure *****************/
622 
623   if (AT) {
624     for (i = rstart; i < rend; i++) {
625       row_indx = i - rstart;
626       PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
627       /* allocate buffers */
628       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
629       /* copy the matrix (i.e., the structure) */
630       for (j = 0; j < nz; j++) {
631         col = cols[j];
632         len = rows->rlen[row_indx]++;
633 
634         rows->rptrs[row_indx][len] = mapping[col];
635       }
636       PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
637     }
638   }
639 
640   PetscCall(PetscFree(mapping));
641 
642   order_pointers(M);
643   M->maxnz = calc_maxnz(M);
644   *B       = M;
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 /*
649    Converts from an SPAI matrix B  to a PETSc matrix PB.
650    This assumes that the SPAI matrix B is stored in
651    COMPRESSED-ROW format.
652 */
653 static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
654 {
655   PetscMPIInt size, rank;
656   int         m, n, M, N;
657   int         d_nz, o_nz;
658   int        *d_nnz, *o_nnz;
659   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
660   PetscScalar val;
661 
662   PetscFunctionBegin;
663   PetscCallMPI(MPI_Comm_size(comm, &size));
664   PetscCallMPI(MPI_Comm_rank(comm, &rank));
665 
666   m = n = B->mnls[rank];
667   d_nz = o_nz = 0;
668 
669   /* Determine preallocation for MatCreateAIJ */
670   PetscCall(PetscMalloc1(m, &d_nnz));
671   PetscCall(PetscMalloc1(m, &o_nnz));
672   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
673   first_diag_col = B->start_indices[rank];
674   last_diag_col  = first_diag_col + B->mnls[rank];
675   for (i = 0; i < B->mnls[rank]; i++) {
676     for (k = 0; k < B->lines->len[i]; k++) {
677       global_col = B->lines->ptrs[i][k];
678       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
679       else o_nnz[i]++;
680     }
681   }
682 
683   M = N = B->n;
684   /* Here we only know how to create AIJ format */
685   PetscCall(MatCreate(comm, PB));
686   PetscCall(MatSetSizes(*PB, m, n, M, N));
687   PetscCall(MatSetType(*PB, MATAIJ));
688   PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
689   PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
690 
691   for (i = 0; i < B->mnls[rank]; i++) {
692     global_row = B->start_indices[rank] + i;
693     for (k = 0; k < B->lines->len[i]; k++) {
694       global_col = B->lines->ptrs[i][k];
695 
696       val = B->lines->A[i][k];
697       PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
698     }
699   }
700 
701   PetscCall(PetscFree(d_nnz));
702   PetscCall(PetscFree(o_nnz));
703 
704   PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
705   PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
706   PetscFunctionReturn(PETSC_SUCCESS);
707 }
708