xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 
22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
23 #include <../src/ksp/pc/impls/spai/petscspai.h>
24 
25 /*
26     These are the SPAI include files
27 */
28 EXTERN_C_BEGIN
29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30 #include <spai.h>
31 #include <matrix.h>
32 EXTERN_C_END
33 
34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *);
37 extern PetscErrorCode MM_to_PETSC(char *, char *, char *);
38 
39 typedef struct {
40   matrix *B;  /* matrix in SPAI format */
41   matrix *BT; /* transpose of matrix in SPAI format */
42   matrix *M;  /* the approximate inverse in SPAI format */
43 
44   Mat PM; /* the approximate inverse PETSc format */
45 
46   double epsilon;    /* tolerance */
47   int    nbsteps;    /* max number of "improvement" steps per line */
48   int    max;        /* max dimensions of is_I, q, etc. */
49   int    maxnew;     /* max number of new entries per step */
50   int    block_size; /* constant block size */
51   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;    /* SPAI prints timing and statistics */
53 
54   int      sp;        /* symmetric nonzero pattern */
55   MPI_Comm comm_spai; /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 static PetscErrorCode PCSetUp_SPAI(PC pc) {
59   PC_SPAI *ispai = (PC_SPAI *)pc->data;
60   Mat      AT;
61 
62   PetscFunctionBegin;
63   init_SPAI();
64 
65   if (ispai->sp) {
66     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
67   } else {
68     /* Use the transpose to get the column nonzero structure. */
69     PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
70     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
71     PetscCall(MatDestroy(&AT));
72   }
73 
74   /* Destroy the transpose */
75   /* Don't know how to do it. PETSc developers? */
76 
77   /* construct SPAI preconditioner */
78   /* FILE *messages */    /* file for warning messages */
79   /* double epsilon */    /* tolerance */
80   /* int nbsteps */       /* max number of "improvement" steps per line */
81   /* int max */           /* max dimensions of is_I, q, etc. */
82   /* int maxnew */        /* max number of new entries per step */
83   /* int block_size */    /* block_size == 1 specifies scalar elments
84                               block_size == n specifies nxn constant-block elements
85                               block_size == 0 specifies variable-block elements */
86   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
87   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
88                               verbose == 1 prints timing and matrix statistics */
89 
90   PetscCall(bspai(ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose));
91 
92   PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
93 
94   /* free the SPAI matrices */
95   sp_free_matrix(ispai->B);
96   sp_free_matrix(ispai->M);
97   PetscFunctionReturn(0);
98 }
99 
100 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y) {
101   PC_SPAI *ispai = (PC_SPAI *)pc->data;
102 
103   PetscFunctionBegin;
104   /* Now using PETSc's multiply */
105   PetscCall(MatMult(ispai->PM, xx, y));
106   PetscFunctionReturn(0);
107 }
108 
109 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y) {
110   PC_SPAI *ispai = (PC_SPAI *)pc->data;
111 
112   PetscFunctionBegin;
113   /* Now using PETSc's multiply */
114   PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
115   PetscFunctionReturn(0);
116 }
117 
118 static PetscErrorCode PCDestroy_SPAI(PC pc) {
119   PC_SPAI *ispai = (PC_SPAI *)pc->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatDestroy(&ispai->PM));
123   PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
124   PetscCall(PetscFree(pc->data));
125   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
126   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
127   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
128   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
129   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
130   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
131   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
132   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
133   PetscFunctionReturn(0);
134 }
135 
136 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer) {
137   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
138   PetscBool iascii;
139 
140   PetscFunctionBegin;
141   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
142   if (iascii) {
143     PetscCall(PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon));
144     PetscCall(PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps));
145     PetscCall(PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max));
146     PetscCall(PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew));
147     PetscCall(PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size));
148     PetscCall(PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size));
149     PetscCall(PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose));
150     PetscCall(PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp));
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1) {
156   PC_SPAI *ispai = (PC_SPAI *)pc->data;
157 
158   PetscFunctionBegin;
159   ispai->epsilon = (double)epsilon1;
160   PetscFunctionReturn(0);
161 }
162 
163 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1) {
164   PC_SPAI *ispai = (PC_SPAI *)pc->data;
165 
166   PetscFunctionBegin;
167   ispai->nbsteps = (int)nbsteps1;
168   PetscFunctionReturn(0);
169 }
170 
171 /* added 1/7/99 g.h. */
172 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1) {
173   PC_SPAI *ispai = (PC_SPAI *)pc->data;
174 
175   PetscFunctionBegin;
176   ispai->max = (int)max1;
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1) {
181   PC_SPAI *ispai = (PC_SPAI *)pc->data;
182 
183   PetscFunctionBegin;
184   ispai->maxnew = (int)maxnew1;
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1) {
189   PC_SPAI *ispai = (PC_SPAI *)pc->data;
190 
191   PetscFunctionBegin;
192   ispai->block_size = (int)block_size1;
193   PetscFunctionReturn(0);
194 }
195 
196 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size) {
197   PC_SPAI *ispai = (PC_SPAI *)pc->data;
198 
199   PetscFunctionBegin;
200   ispai->cache_size = (int)cache_size;
201   PetscFunctionReturn(0);
202 }
203 
204 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose) {
205   PC_SPAI *ispai = (PC_SPAI *)pc->data;
206 
207   PetscFunctionBegin;
208   ispai->verbose = (int)verbose;
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp) {
213   PC_SPAI *ispai = (PC_SPAI *)pc->data;
214 
215   PetscFunctionBegin;
216   ispai->sp = (int)sp;
217   PetscFunctionReturn(0);
218 }
219 
220 /*@
221   PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
222 
223   Input Parameters:
224 + pc - the preconditioner
225 - eps - epsilon (default .4)
226 
227   Note:
228     Espilon must be between 0 and 1. It controls the
229                  quality of the approximation of M to the inverse of
230                  A. Higher values of epsilon lead to more work, more
231                  fill, and usually better preconditioners. In many
232                  cases the best choice of epsilon is the one that
233                  divides the total solution time equally between the
234                  preconditioner and the solver.
235 
236   Level: intermediate
237 
238 .seealso: `PCSPAI`, `PCSetType()`
239   @*/
240 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1) {
241   PetscFunctionBegin;
242   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
243   PetscFunctionReturn(0);
244 }
245 
246 /*@
247   PCSPAISetNBSteps - set maximum number of improvement steps per row in
248         the `PCSPAI` preconditioner
249 
250   Input Parameters:
251 + pc - the preconditioner
252 - n - number of steps (default 5)
253 
254   Note:
255     `PCSPAI` constructs to approximation to every column of
256                  the exact inverse of A in a series of improvement
257                  steps. The quality of the approximation is determined
258                  by epsilon. If an approximation achieving an accuracy
259                  of epsilon is not obtained after ns steps, SPAI simply
260                  uses the best approximation constructed so far.
261 
262   Level: intermediate
263 
264 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
265 @*/
266 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1) {
267   PetscFunctionBegin;
268   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
269   PetscFunctionReturn(0);
270 }
271 
272 /* added 1/7/99 g.h. */
273 /*@
274   PCSPAISetMax - set the size of various working buffers in
275         the `PCSPAI` preconditioner
276 
277   Input Parameters:
278 + pc - the preconditioner
279 - n - size (default is 5000)
280 
281   Level: intermediate
282 
283 .seealso: `PCSPAI`, `PCSetType()`
284 @*/
285 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1) {
286   PetscFunctionBegin;
287   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
288   PetscFunctionReturn(0);
289 }
290 
291 /*@
292   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
293    in `PCSPAI` preconditioner
294 
295   Input Parameters:
296 + pc - the preconditioner
297 - n - maximum number (default 5)
298 
299   Level: intermediate
300 
301 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
302 @*/
303 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1) {
304   PetscFunctionBegin;
305   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
306   PetscFunctionReturn(0);
307 }
308 
309 /*@
310   PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
311 
312   Input Parameters:
313 + pc - the preconditioner
314 - n - block size (default 1)
315 
316   Notes:
317     A block
318                  size of 1 treats A as a matrix of scalar elements. A
319                  block size of s > 1 treats A as a matrix of sxs
320                  blocks. A block size of 0 treats A as a matrix with
321                  variable sized blocks, which are determined by
322                  searching for dense square diagonal blocks in A.
323                  This can be very effective for finite-element
324                  matrices.
325 
326                  SPAI will convert A to block form, use a block
327                  version of the preconditioner algorithm, and then
328                  convert the result back to scalar form.
329 
330                  In many cases the a block-size parameter other than 1
331                  can lead to very significant improvement in
332                  performance.
333 
334   Level: intermediate
335 
336 .seealso: `PCSPAI`, `PCSetType()`
337 @*/
338 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1) {
339   PetscFunctionBegin;
340   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
341   PetscFunctionReturn(0);
342 }
343 
344 /*@
345   PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
346 
347   Input Parameters:
348 + pc - the preconditioner
349 - n -  cache size {0,1,2,3,4,5} (default 5)
350 
351   Note:
352     `PCSPAI` uses a hash table to cache messages and avoid
353                  redundant communication. If suggest always using
354                  5. This parameter is irrelevant in the serial
355                  version.
356 
357   Level: intermediate
358 
359 .seealso: `PCSPAI`, `PCSetType()`
360 @*/
361 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size) {
362   PetscFunctionBegin;
363   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
364   PetscFunctionReturn(0);
365 }
366 
367 /*@
368   PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
369 
370   Input Parameters:
371 + pc - the preconditioner
372 - n - level (default 1)
373 
374   Note:
375     print parameters, timings and matrix statistics
376 
377   Level: intermediate
378 
379 .seealso: `PCSPAI`, `PCSetType()`
380 @*/
381 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose) {
382   PetscFunctionBegin;
383   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
384   PetscFunctionReturn(0);
385 }
386 
387 /*@
388   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
389 
390   Input Parameters:
391 + pc - the preconditioner
392 - n - 0 or 1
393 
394   Note:
395     If A has a symmetric nonzero pattern use -sp 1 to
396                  improve performance by eliminating some communication
397                  in the parallel version. Even if A does not have a
398                  symmetric nonzero pattern -sp 1 may well lead to good
399                  results, but the code will not follow the published
400                  SPAI algorithm exactly.
401 
402   Level: intermediate
403 
404 .seealso: `PCSPAI`, `PCSetType()`
405 @*/
406 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp) {
407   PetscFunctionBegin;
408   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
409   PetscFunctionReturn(0);
410 }
411 
412 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject) {
413   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
414   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
415   double    epsilon1;
416   PetscBool flg;
417 
418   PetscFunctionBegin;
419   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
420   PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
421   if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
422   PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
423   if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
424   /* added 1/7/99 g.h. */
425   PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
426   if (flg) PetscCall(PCSPAISetMax(pc, max1));
427   PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
428   if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
429   PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
430   if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
431   PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
432   if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
433   PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
434   if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
435   PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
436   if (flg) PetscCall(PCSPAISetSp(pc, sp));
437   PetscOptionsHeadEnd();
438   PetscFunctionReturn(0);
439 }
440 
441 /*MC
442    PCSPAI - Use the Sparse Approximate Inverse method
443 
444    Options Database Keys:
445 +  -pc_spai_epsilon <eps> - set tolerance
446 .  -pc_spai_nbstep <n> - set nbsteps
447 .  -pc_spai_max <m> - set max
448 .  -pc_spai_max_new <m> - set maxnew
449 .  -pc_spai_block_size <n> - set block size
450 .  -pc_spai_cache_size <n> - set cache size
451 .  -pc_spai_sp <m> - set sp
452 -  -pc_spai_set_verbose <true,false> - verbose output
453 
454    Level: beginner
455 
456    Note:
457     This only works with `MATAIJ` matrices.
458 
459    References:
460  . * -  Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)
461 
462 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
463           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
464           `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
465 M*/
466 
467 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) {
468   PC_SPAI *ispai;
469 
470   PetscFunctionBegin;
471   PetscCall(PetscNew(&ispai));
472   pc->data = ispai;
473 
474   pc->ops->destroy         = PCDestroy_SPAI;
475   pc->ops->apply           = PCApply_SPAI;
476   pc->ops->matapply        = PCMatApply_SPAI;
477   pc->ops->applyrichardson = 0;
478   pc->ops->setup           = PCSetUp_SPAI;
479   pc->ops->view            = PCView_SPAI;
480   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
481 
482   ispai->epsilon    = .4;
483   ispai->nbsteps    = 5;
484   ispai->max        = 5000;
485   ispai->maxnew     = 5;
486   ispai->block_size = 1;
487   ispai->cache_size = 5;
488   ispai->verbose    = 0;
489 
490   ispai->sp = 1;
491   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));
492 
493   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
494   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
495   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
496   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
497   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
498   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
499   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
500   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
501   PetscFunctionReturn(0);
502 }
503 
504 /*
505    Converts from a PETSc matrix to an SPAI matrix
506 */
507 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B) {
508   matrix                  *M;
509   int                      i, j, col;
510   int                      row_indx;
511   int                      len, pe, local_indx, start_indx;
512   int                     *mapping;
513   const int               *cols;
514   const double            *vals;
515   int                      n, mnl, nnl, nz, rstart, rend;
516   PetscMPIInt              size, rank;
517   struct compressed_lines *rows;
518 
519   PetscFunctionBegin;
520   PetscCallMPI(MPI_Comm_size(comm, &size));
521   PetscCallMPI(MPI_Comm_rank(comm, &rank));
522   PetscCall(MatGetSize(A, &n, &n));
523   PetscCall(MatGetLocalSize(A, &mnl, &nnl));
524 
525   /*
526     not sure why a barrier is required. commenting out
527   PetscCallMPI(MPI_Barrier(comm));
528   */
529 
530   M = new_matrix((SPAI_Comm)comm);
531 
532   M->n              = n;
533   M->bs             = 1;
534   M->max_block_size = 1;
535 
536   M->mnls          = (int *)malloc(sizeof(int) * size);
537   M->start_indices = (int *)malloc(sizeof(int) * size);
538   M->pe            = (int *)malloc(sizeof(int) * n);
539   M->block_sizes   = (int *)malloc(sizeof(int) * n);
540   for (i = 0; i < n; i++) M->block_sizes[i] = 1;
541 
542   PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
543 
544   M->start_indices[0] = 0;
545   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
546 
547   M->mnl            = M->mnls[M->myid];
548   M->my_start_index = M->start_indices[M->myid];
549 
550   for (i = 0; i < size; i++) {
551     start_indx = M->start_indices[i];
552     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
553   }
554 
555   if (AT) {
556     M->lines = new_compressed_lines(M->mnls[rank], 1);
557   } else {
558     M->lines = new_compressed_lines(M->mnls[rank], 0);
559   }
560 
561   rows = M->lines;
562 
563   /* Determine the mapping from global indices to pointers */
564   PetscCall(PetscMalloc1(M->n, &mapping));
565   pe         = 0;
566   local_indx = 0;
567   for (i = 0; i < M->n; i++) {
568     if (local_indx >= M->mnls[pe]) {
569       pe++;
570       local_indx = 0;
571     }
572     mapping[i] = local_indx + M->start_indices[pe];
573     local_indx++;
574   }
575 
576   /************** Set up the row structure *****************/
577 
578   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
579   for (i = rstart; i < rend; i++) {
580     row_indx = i - rstart;
581     PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
582     /* allocate buffers */
583     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
584     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
585     /* copy the matrix */
586     for (j = 0; j < nz; j++) {
587       col = cols[j];
588       len = rows->len[row_indx]++;
589 
590       rows->ptrs[row_indx][len] = mapping[col];
591       rows->A[row_indx][len]    = vals[j];
592     }
593     rows->slen[row_indx] = rows->len[row_indx];
594 
595     PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
596   }
597 
598   /************** Set up the column structure *****************/
599 
600   if (AT) {
601     for (i = rstart; i < rend; i++) {
602       row_indx = i - rstart;
603       PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
604       /* allocate buffers */
605       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
606       /* copy the matrix (i.e., the structure) */
607       for (j = 0; j < nz; j++) {
608         col = cols[j];
609         len = rows->rlen[row_indx]++;
610 
611         rows->rptrs[row_indx][len] = mapping[col];
612       }
613       PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
614     }
615   }
616 
617   PetscCall(PetscFree(mapping));
618 
619   order_pointers(M);
620   M->maxnz = calc_maxnz(M);
621   *B       = M;
622   PetscFunctionReturn(0);
623 }
624 
625 /*
626    Converts from an SPAI matrix B  to a PETSc matrix PB.
627    This assumes that the SPAI matrix B is stored in
628    COMPRESSED-ROW format.
629 */
630 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB) {
631   PetscMPIInt size, rank;
632   int         m, n, M, N;
633   int         d_nz, o_nz;
634   int        *d_nnz, *o_nnz;
635   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
636   PetscScalar val;
637 
638   PetscFunctionBegin;
639   PetscCallMPI(MPI_Comm_size(comm, &size));
640   PetscCallMPI(MPI_Comm_rank(comm, &rank));
641 
642   m = n = B->mnls[rank];
643   d_nz = o_nz = 0;
644 
645   /* Determine preallocation for MatCreateAIJ */
646   PetscCall(PetscMalloc1(m, &d_nnz));
647   PetscCall(PetscMalloc1(m, &o_nnz));
648   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
649   first_diag_col = B->start_indices[rank];
650   last_diag_col  = first_diag_col + B->mnls[rank];
651   for (i = 0; i < B->mnls[rank]; i++) {
652     for (k = 0; k < B->lines->len[i]; k++) {
653       global_col = B->lines->ptrs[i][k];
654       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
655       else o_nnz[i]++;
656     }
657   }
658 
659   M = N = B->n;
660   /* Here we only know how to create AIJ format */
661   PetscCall(MatCreate(comm, PB));
662   PetscCall(MatSetSizes(*PB, m, n, M, N));
663   PetscCall(MatSetType(*PB, MATAIJ));
664   PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
665   PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
666 
667   for (i = 0; i < B->mnls[rank]; i++) {
668     global_row = B->start_indices[rank] + i;
669     for (k = 0; k < B->lines->len[i]; k++) {
670       global_col = B->lines->ptrs[i][k];
671 
672       val = B->lines->A[i][k];
673       PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
674     }
675   }
676 
677   PetscCall(PetscFree(d_nnz));
678   PetscCall(PetscFree(o_nnz));
679 
680   PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
681   PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
682   PetscFunctionReturn(0);
683 }
684 
685 /*
686    Converts from an SPAI vector v  to a PETSc vec Pv.
687 */
688 PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv) {
689   PetscMPIInt size, rank;
690   int         m, M, i, *mnls, *start_indices, *global_indices;
691 
692   PetscFunctionBegin;
693   PetscCallMPI(MPI_Comm_size(comm, &size));
694   PetscCallMPI(MPI_Comm_rank(comm, &rank));
695 
696   m = v->mnl;
697   M = v->n;
698 
699   PetscCall(VecCreateMPI(comm, m, M, Pv));
700 
701   PetscCall(PetscMalloc1(size, &mnls));
702   PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm));
703 
704   PetscCall(PetscMalloc1(size, &start_indices));
705 
706   start_indices[0] = 0;
707   for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1];
708 
709   PetscCall(PetscMalloc1(v->mnl, &global_indices));
710   for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i;
711 
712   PetscCall(PetscFree(mnls));
713   PetscCall(PetscFree(start_indices));
714 
715   PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES));
716   PetscCall(VecAssemblyBegin(*Pv));
717   PetscCall(VecAssemblyEnd(*Pv));
718 
719   PetscCall(PetscFree(global_indices));
720   PetscFunctionReturn(0);
721 }
722