xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 
22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
23 #include <../src/ksp/pc/impls/spai/petscspai.h>
24 
25 /*
26     These are the SPAI include files
27 */
28 EXTERN_C_BEGIN
29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30 #include <spai.h>
31 #include <matrix.h>
32 EXTERN_C_END
33 
34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *);
37 extern PetscErrorCode MM_to_PETSC(char *, char *, char *);
38 
39 typedef struct {
40   matrix *B;  /* matrix in SPAI format */
41   matrix *BT; /* transpose of matrix in SPAI format */
42   matrix *M;  /* the approximate inverse in SPAI format */
43 
44   Mat PM; /* the approximate inverse PETSc format */
45 
46   double epsilon;    /* tolerance */
47   int    nbsteps;    /* max number of "improvement" steps per line */
48   int    max;        /* max dimensions of is_I, q, etc. */
49   int    maxnew;     /* max number of new entries per step */
50   int    block_size; /* constant block size */
51   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;    /* SPAI prints timing and statistics */
53 
54   int      sp;        /* symmetric nonzero pattern */
55   MPI_Comm comm_spai; /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 static PetscErrorCode PCSetUp_SPAI(PC pc) {
61   PC_SPAI *ispai = (PC_SPAI *)pc->data;
62   Mat      AT;
63 
64   PetscFunctionBegin;
65   init_SPAI();
66 
67   if (ispai->sp) {
68     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
69   } else {
70     /* Use the transpose to get the column nonzero structure. */
71     PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
72     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
73     PetscCall(MatDestroy(&AT));
74   }
75 
76   /* Destroy the transpose */
77   /* Don't know how to do it. PETSc developers? */
78 
79   /* construct SPAI preconditioner */
80   /* FILE *messages */    /* file for warning messages */
81   /* double epsilon */    /* tolerance */
82   /* int nbsteps */       /* max number of "improvement" steps per line */
83   /* int max */           /* max dimensions of is_I, q, etc. */
84   /* int maxnew */        /* max number of new entries per step */
85   /* int block_size */    /* block_size == 1 specifies scalar elments
86                               block_size == n specifies nxn constant-block elements
87                               block_size == 0 specifies variable-block elements */
88   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
89   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
90                               verbose == 1 prints timing and matrix statistics */
91 
92   PetscCall(bspai(ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose));
93 
94   PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
95 
96   /* free the SPAI matrices */
97   sp_free_matrix(ispai->B);
98   sp_free_matrix(ispai->M);
99   PetscFunctionReturn(0);
100 }
101 
102 /**********************************************************************/
103 
104 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) {
105   PC_SPAI *ispai = (PC_SPAI *)pc->data;
106 
107   PetscFunctionBegin;
108   /* Now using PETSc's multiply */
109   PetscCall(MatMult(ispai->PM, xx, y));
110   PetscFunctionReturn(0);
111 }
112 
113 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) {
114   PC_SPAI *ispai = (PC_SPAI *)pc->data;
115 
116   PetscFunctionBegin;
117   /* Now using PETSc's multiply */
118   PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
119   PetscFunctionReturn(0);
120 }
121 
122 /**********************************************************************/
123 
124 static PetscErrorCode PCDestroy_SPAI(PC pc) {
125   PC_SPAI *ispai = (PC_SPAI *)pc->data;
126 
127   PetscFunctionBegin;
128   PetscCall(MatDestroy(&ispai->PM));
129   PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
130   PetscCall(PetscFree(pc->data));
131   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
132   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
133   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
134   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
135   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
136   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
137   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
138   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
139   PetscFunctionReturn(0);
140 }
141 
142 /**********************************************************************/
143 
144 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) {
145   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
146   PetscBool iascii;
147 
148   PetscFunctionBegin;
149   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
150   if (iascii) {
151     PetscCall(PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon));
152     PetscCall(PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps));
153     PetscCall(PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max));
154     PetscCall(PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew));
155     PetscCall(PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size));
156     PetscCall(PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size));
157     PetscCall(PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose));
158     PetscCall(PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp));
159   }
160   PetscFunctionReturn(0);
161 }
162 
163 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) {
164   PC_SPAI *ispai = (PC_SPAI *)pc->data;
165 
166   PetscFunctionBegin;
167   ispai->epsilon = (double)epsilon1;
168   PetscFunctionReturn(0);
169 }
170 
171 /**********************************************************************/
172 
173 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) {
174   PC_SPAI *ispai = (PC_SPAI *)pc->data;
175 
176   PetscFunctionBegin;
177   ispai->nbsteps = (int)nbsteps1;
178   PetscFunctionReturn(0);
179 }
180 
181 /**********************************************************************/
182 
183 /* added 1/7/99 g.h. */
184 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) {
185   PC_SPAI *ispai = (PC_SPAI *)pc->data;
186 
187   PetscFunctionBegin;
188   ispai->max = (int)max1;
189   PetscFunctionReturn(0);
190 }
191 
192 /**********************************************************************/
193 
194 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) {
195   PC_SPAI *ispai = (PC_SPAI *)pc->data;
196 
197   PetscFunctionBegin;
198   ispai->maxnew = (int)maxnew1;
199   PetscFunctionReturn(0);
200 }
201 
202 /**********************************************************************/
203 
204 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) {
205   PC_SPAI *ispai = (PC_SPAI *)pc->data;
206 
207   PetscFunctionBegin;
208   ispai->block_size = (int)block_size1;
209   PetscFunctionReturn(0);
210 }
211 
212 /**********************************************************************/
213 
214 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) {
215   PC_SPAI *ispai = (PC_SPAI *)pc->data;
216 
217   PetscFunctionBegin;
218   ispai->cache_size = (int)cache_size;
219   PetscFunctionReturn(0);
220 }
221 
222 /**********************************************************************/
223 
224 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) {
225   PC_SPAI *ispai = (PC_SPAI *)pc->data;
226 
227   PetscFunctionBegin;
228   ispai->verbose = (int)verbose;
229   PetscFunctionReturn(0);
230 }
231 
232 /**********************************************************************/
233 
234 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) {
235   PC_SPAI *ispai = (PC_SPAI *)pc->data;
236 
237   PetscFunctionBegin;
238   ispai->sp = (int)sp;
239   PetscFunctionReturn(0);
240 }
241 
242 /* -------------------------------------------------------------------*/
243 
244 /*@
245   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
246 
247   Input Parameters:
248 + pc - the preconditioner
249 - eps - epsilon (default .4)
250 
251   Notes:
252     Espilon must be between 0 and 1. It controls the
253                  quality of the approximation of M to the inverse of
254                  A. Higher values of epsilon lead to more work, more
255                  fill, and usually better preconditioners. In many
256                  cases the best choice of epsilon is the one that
257                  divides the total solution time equally between the
258                  preconditioner and the solver.
259 
260   Level: intermediate
261 
262 .seealso: `PCSPAI`, `PCSetType()`
263   @*/
264 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) {
265   PetscFunctionBegin;
266   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
267   PetscFunctionReturn(0);
268 }
269 
270 /**********************************************************************/
271 
272 /*@
273   PCSPAISetNBSteps - set maximum number of improvement steps per row in
274         the SPAI preconditioner
275 
276   Input Parameters:
277 + pc - the preconditioner
278 - n - number of steps (default 5)
279 
280   Notes:
281     SPAI constructs to approximation to every column of
282                  the exact inverse of A in a series of improvement
283                  steps. The quality of the approximation is determined
284                  by epsilon. If an approximation achieving an accuracy
285                  of epsilon is not obtained after ns steps, SPAI simply
286                  uses the best approximation constructed so far.
287 
288   Level: intermediate
289 
290 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
291 @*/
292 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) {
293   PetscFunctionBegin;
294   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
295   PetscFunctionReturn(0);
296 }
297 
298 /**********************************************************************/
299 
300 /* added 1/7/99 g.h. */
301 /*@
302   PCSPAISetMax - set the size of various working buffers in
303         the SPAI preconditioner
304 
305   Input Parameters:
306 + pc - the preconditioner
307 - n - size (default is 5000)
308 
309   Level: intermediate
310 
311 .seealso: `PCSPAI`, `PCSetType()`
312 @*/
313 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) {
314   PetscFunctionBegin;
315   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
316   PetscFunctionReturn(0);
317 }
318 
319 /**********************************************************************/
320 
321 /*@
322   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
323    in SPAI preconditioner
324 
325   Input Parameters:
326 + pc - the preconditioner
327 - n - maximum number (default 5)
328 
329   Level: intermediate
330 
331 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
332 @*/
333 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) {
334   PetscFunctionBegin;
335   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
336   PetscFunctionReturn(0);
337 }
338 
339 /**********************************************************************/
340 
341 /*@
342   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
343 
344   Input Parameters:
345 + pc - the preconditioner
346 - n - block size (default 1)
347 
348   Notes:
349     A block
350                  size of 1 treats A as a matrix of scalar elements. A
351                  block size of s > 1 treats A as a matrix of sxs
352                  blocks. A block size of 0 treats A as a matrix with
353                  variable sized blocks, which are determined by
354                  searching for dense square diagonal blocks in A.
355                  This can be very effective for finite-element
356                  matrices.
357 
358                  SPAI will convert A to block form, use a block
359                  version of the preconditioner algorithm, and then
360                  convert the result back to scalar form.
361 
362                  In many cases the a block-size parameter other than 1
363                  can lead to very significant improvement in
364                  performance.
365 
366   Level: intermediate
367 
368 .seealso: `PCSPAI`, `PCSetType()`
369 @*/
370 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) {
371   PetscFunctionBegin;
372   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
373   PetscFunctionReturn(0);
374 }
375 
376 /**********************************************************************/
377 
378 /*@
379   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
380 
381   Input Parameters:
382 + pc - the preconditioner
383 - n -  cache size {0,1,2,3,4,5} (default 5)
384 
385   Notes:
386     SPAI uses a hash table to cache messages and avoid
387                  redundant communication. If suggest always using
388                  5. This parameter is irrelevant in the serial
389                  version.
390 
391   Level: intermediate
392 
393 .seealso: `PCSPAI`, `PCSetType()`
394 @*/
395 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) {
396   PetscFunctionBegin;
397   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
398   PetscFunctionReturn(0);
399 }
400 
401 /**********************************************************************/
402 
403 /*@
404   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
405 
406   Input Parameters:
407 + pc - the preconditioner
408 - n - level (default 1)
409 
410   Notes:
411     print parameters, timings and matrix statistics
412 
413   Level: intermediate
414 
415 .seealso: `PCSPAI`, `PCSetType()`
416 @*/
417 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) {
418   PetscFunctionBegin;
419   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
420   PetscFunctionReturn(0);
421 }
422 
423 /**********************************************************************/
424 
425 /*@
426   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
427 
428   Input Parameters:
429 + pc - the preconditioner
430 - n - 0 or 1
431 
432   Notes:
433     If A has a symmetric nonzero pattern use -sp 1 to
434                  improve performance by eliminating some communication
435                  in the parallel version. Even if A does not have a
436                  symmetric nonzero pattern -sp 1 may well lead to good
437                  results, but the code will not follow the published
438                  SPAI algorithm exactly.
439 
440   Level: intermediate
441 
442 .seealso: `PCSPAI`, `PCSetType()`
443 @*/
444 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) {
445   PetscFunctionBegin;
446   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
447   PetscFunctionReturn(0);
448 }
449 
450 /**********************************************************************/
451 
452 /**********************************************************************/
453 
454 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject) {
455   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
456   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
457   double    epsilon1;
458   PetscBool flg;
459 
460   PetscFunctionBegin;
461   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
462   PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
463   if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
464   PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
465   if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
466   /* added 1/7/99 g.h. */
467   PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
468   if (flg) PetscCall(PCSPAISetMax(pc, max1));
469   PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
470   if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
471   PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
472   if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
473   PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
474   if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
475   PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
476   if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
477   PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
478   if (flg) PetscCall(PCSPAISetSp(pc, sp));
479   PetscOptionsHeadEnd();
480   PetscFunctionReturn(0);
481 }
482 
483 /**********************************************************************/
484 
485 /*MC
486    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
487      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
488 
489    Options Database Keys:
490 +  -pc_spai_epsilon <eps> - set tolerance
491 .  -pc_spai_nbstep <n> - set nbsteps
492 .  -pc_spai_max <m> - set max
493 .  -pc_spai_max_new <m> - set maxnew
494 .  -pc_spai_block_size <n> - set block size
495 .  -pc_spai_cache_size <n> - set cache size
496 .  -pc_spai_sp <m> - set sp
497 -  -pc_spai_set_verbose <true,false> - verbose output
498 
499    Notes:
500     This only works with AIJ matrices.
501 
502    Level: beginner
503 
504 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
505           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
506           `PCSPAISetVerbose()`, `PCSPAISetSp()`
507 M*/
508 
509 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) {
510   PC_SPAI *ispai;
511 
512   PetscFunctionBegin;
513   PetscCall(PetscNewLog(pc, &ispai));
514   pc->data = ispai;
515 
516   pc->ops->destroy         = PCDestroy_SPAI;
517   pc->ops->apply           = PCApply_SPAI;
518   pc->ops->matapply        = PCMatApply_SPAI;
519   pc->ops->applyrichardson = 0;
520   pc->ops->setup           = PCSetUp_SPAI;
521   pc->ops->view            = PCView_SPAI;
522   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
523 
524   ispai->epsilon    = .4;
525   ispai->nbsteps    = 5;
526   ispai->max        = 5000;
527   ispai->maxnew     = 5;
528   ispai->block_size = 1;
529   ispai->cache_size = 5;
530   ispai->verbose    = 0;
531 
532   ispai->sp = 1;
533   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));
534 
535   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
536   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
537   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
538   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
539   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
540   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
541   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
542   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
543   PetscFunctionReturn(0);
544 }
545 
546 /**********************************************************************/
547 
548 /*
549    Converts from a PETSc matrix to an SPAI matrix
550 */
551 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) {
552   matrix                  *M;
553   int                      i, j, col;
554   int                      row_indx;
555   int                      len, pe, local_indx, start_indx;
556   int                     *mapping;
557   const int               *cols;
558   const double            *vals;
559   int                      n, mnl, nnl, nz, rstart, rend;
560   PetscMPIInt              size, rank;
561   struct compressed_lines *rows;
562 
563   PetscFunctionBegin;
564   PetscCallMPI(MPI_Comm_size(comm, &size));
565   PetscCallMPI(MPI_Comm_rank(comm, &rank));
566   PetscCall(MatGetSize(A, &n, &n));
567   PetscCall(MatGetLocalSize(A, &mnl, &nnl));
568 
569   /*
570     not sure why a barrier is required. commenting out
571   PetscCallMPI(MPI_Barrier(comm));
572   */
573 
574   M = new_matrix((SPAI_Comm)comm);
575 
576   M->n              = n;
577   M->bs             = 1;
578   M->max_block_size = 1;
579 
580   M->mnls          = (int *)malloc(sizeof(int) * size);
581   M->start_indices = (int *)malloc(sizeof(int) * size);
582   M->pe            = (int *)malloc(sizeof(int) * n);
583   M->block_sizes   = (int *)malloc(sizeof(int) * n);
584   for (i = 0; i < n; i++) M->block_sizes[i] = 1;
585 
586   PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
587 
588   M->start_indices[0] = 0;
589   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
590 
591   M->mnl            = M->mnls[M->myid];
592   M->my_start_index = M->start_indices[M->myid];
593 
594   for (i = 0; i < size; i++) {
595     start_indx = M->start_indices[i];
596     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
597   }
598 
599   if (AT) {
600     M->lines = new_compressed_lines(M->mnls[rank], 1);
601   } else {
602     M->lines = new_compressed_lines(M->mnls[rank], 0);
603   }
604 
605   rows = M->lines;
606 
607   /* Determine the mapping from global indices to pointers */
608   PetscCall(PetscMalloc1(M->n, &mapping));
609   pe         = 0;
610   local_indx = 0;
611   for (i = 0; i < M->n; i++) {
612     if (local_indx >= M->mnls[pe]) {
613       pe++;
614       local_indx = 0;
615     }
616     mapping[i] = local_indx + M->start_indices[pe];
617     local_indx++;
618   }
619 
620   /*********************************************************/
621   /************** Set up the row structure *****************/
622   /*********************************************************/
623 
624   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
625   for (i = rstart; i < rend; i++) {
626     row_indx = i - rstart;
627     PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
628     /* allocate buffers */
629     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
630     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
631     /* copy the matrix */
632     for (j = 0; j < nz; j++) {
633       col = cols[j];
634       len = rows->len[row_indx]++;
635 
636       rows->ptrs[row_indx][len] = mapping[col];
637       rows->A[row_indx][len]    = vals[j];
638     }
639     rows->slen[row_indx] = rows->len[row_indx];
640 
641     PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
642   }
643 
644   /************************************************************/
645   /************** Set up the column structure *****************/
646   /*********************************************************/
647 
648   if (AT) {
649     for (i = rstart; i < rend; i++) {
650       row_indx = i - rstart;
651       PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
652       /* allocate buffers */
653       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
654       /* copy the matrix (i.e., the structure) */
655       for (j = 0; j < nz; j++) {
656         col = cols[j];
657         len = rows->rlen[row_indx]++;
658 
659         rows->rptrs[row_indx][len] = mapping[col];
660       }
661       PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
662     }
663   }
664 
665   PetscCall(PetscFree(mapping));
666 
667   order_pointers(M);
668   M->maxnz = calc_maxnz(M);
669   *B       = M;
670   PetscFunctionReturn(0);
671 }
672 
673 /**********************************************************************/
674 
675 /*
676    Converts from an SPAI matrix B  to a PETSc matrix PB.
677    This assumes that the SPAI matrix B is stored in
678    COMPRESSED-ROW format.
679 */
680 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) {
681   PetscMPIInt size, rank;
682   int         m, n, M, N;
683   int         d_nz, o_nz;
684   int        *d_nnz, *o_nnz;
685   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
686   PetscScalar val;
687 
688   PetscFunctionBegin;
689   PetscCallMPI(MPI_Comm_size(comm, &size));
690   PetscCallMPI(MPI_Comm_rank(comm, &rank));
691 
692   m = n = B->mnls[rank];
693   d_nz = o_nz = 0;
694 
695   /* Determine preallocation for MatCreateAIJ */
696   PetscCall(PetscMalloc1(m, &d_nnz));
697   PetscCall(PetscMalloc1(m, &o_nnz));
698   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
699   first_diag_col = B->start_indices[rank];
700   last_diag_col  = first_diag_col + B->mnls[rank];
701   for (i = 0; i < B->mnls[rank]; i++) {
702     for (k = 0; k < B->lines->len[i]; k++) {
703       global_col = B->lines->ptrs[i][k];
704       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
705       else o_nnz[i]++;
706     }
707   }
708 
709   M = N = B->n;
710   /* Here we only know how to create AIJ format */
711   PetscCall(MatCreate(comm, PB));
712   PetscCall(MatSetSizes(*PB, m, n, M, N));
713   PetscCall(MatSetType(*PB, MATAIJ));
714   PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
715   PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
716 
717   for (i = 0; i < B->mnls[rank]; i++) {
718     global_row = B->start_indices[rank] + i;
719     for (k = 0; k < B->lines->len[i]; k++) {
720       global_col = B->lines->ptrs[i][k];
721 
722       val = B->lines->A[i][k];
723       PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
724     }
725   }
726 
727   PetscCall(PetscFree(d_nnz));
728   PetscCall(PetscFree(o_nnz));
729 
730   PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
731   PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
732   PetscFunctionReturn(0);
733 }
734 
735 /**********************************************************************/
736 
737 /*
738    Converts from an SPAI vector v  to a PETSc vec Pv.
739 */
740 PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv) {
741   PetscMPIInt size, rank;
742   int         m, M, i, *mnls, *start_indices, *global_indices;
743 
744   PetscFunctionBegin;
745   PetscCallMPI(MPI_Comm_size(comm, &size));
746   PetscCallMPI(MPI_Comm_rank(comm, &rank));
747 
748   m = v->mnl;
749   M = v->n;
750 
751   PetscCall(VecCreateMPI(comm, m, M, Pv));
752 
753   PetscCall(PetscMalloc1(size, &mnls));
754   PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm));
755 
756   PetscCall(PetscMalloc1(size, &start_indices));
757 
758   start_indices[0] = 0;
759   for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1];
760 
761   PetscCall(PetscMalloc1(v->mnl, &global_indices));
762   for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i;
763 
764   PetscCall(PetscFree(mnls));
765   PetscCall(PetscFree(start_indices));
766 
767   PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES));
768   PetscCall(VecAssemblyBegin(*Pv));
769   PetscCall(VecAssemblyEnd(*Pv));
770 
771   PetscCall(PetscFree(global_indices));
772   PetscFunctionReturn(0);
773 }
774