xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 8e010fa18d00c25e58c23165efbc8aef665b8a2c)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 #if !defined(PETSC_SKIP_COMPLEX)
21   #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
22 #endif
23 
24 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
25 #include <../src/ksp/pc/impls/spai/petscspai.h>
26 
27 /*
28     These are the SPAI include files
29 */
30 EXTERN_C_BEGIN
31 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
32 #include <spai.h>
33 #include <matrix.h>
34 EXTERN_C_END
35 
36 static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
37 static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
38 
39 typedef struct {
40   matrix *B;  /* matrix in SPAI format */
41   matrix *BT; /* transpose of matrix in SPAI format */
42   matrix *M;  /* the approximate inverse in SPAI format */
43 
44   Mat PM; /* the approximate inverse PETSc format */
45 
46   double epsilon;    /* tolerance */
47   int    nbsteps;    /* max number of "improvement" steps per line */
48   int    max;        /* max dimensions of is_I, q, etc. */
49   int    maxnew;     /* max number of new entries per step */
50   int    block_size; /* constant block size */
51   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;    /* SPAI prints timing and statistics */
53 
54   int      sp;        /* symmetric nonzero pattern */
55   MPI_Comm comm_spai; /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 static PetscErrorCode PCSetUp_SPAI(PC pc)
59 {
60   PC_SPAI *ispai = (PC_SPAI *)pc->data;
61   Mat      AT;
62 
63   PetscFunctionBegin;
64   init_SPAI();
65 
66   if (ispai->sp) {
67     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
68   } else {
69     /* Use the transpose to get the column nonzero structure. */
70     PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
71     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
72     PetscCall(MatDestroy(&AT));
73   }
74 
75   /* Destroy the transpose */
76   /* Don't know how to do it. PETSc developers? */
77 
78   /* construct SPAI preconditioner */
79   /* FILE *messages */    /* file for warning messages */
80   /* double epsilon */    /* tolerance */
81   /* int nbsteps */       /* max number of "improvement" steps per line */
82   /* int max */           /* max dimensions of is_I, q, etc. */
83   /* int maxnew */        /* max number of new entries per step */
84   /* int block_size */    /* block_size == 1 specifies scalar elements
85                               block_size == n specifies nxn constant-block elements
86                               block_size == 0 specifies variable-block elements */
87   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
88   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
89                               verbose == 1 prints timing and matrix statistics */
90 
91   PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
92 
93   PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
94 
95   /* free the SPAI matrices */
96   sp_free_matrix(ispai->B);
97   sp_free_matrix(ispai->M);
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
102 {
103   PC_SPAI *ispai = (PC_SPAI *)pc->data;
104 
105   PetscFunctionBegin;
106   /* Now using PETSc's multiply */
107   PetscCall(MatMult(ispai->PM, xx, y));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
112 {
113   PC_SPAI *ispai = (PC_SPAI *)pc->data;
114 
115   PetscFunctionBegin;
116   /* Now using PETSc's multiply */
117   PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
118   PetscFunctionReturn(PETSC_SUCCESS);
119 }
120 
121 static PetscErrorCode PCDestroy_SPAI(PC pc)
122 {
123   PC_SPAI *ispai = (PC_SPAI *)pc->data;
124 
125   PetscFunctionBegin;
126   PetscCall(MatDestroy(&ispai->PM));
127   PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
128   PetscCall(PetscFree(pc->data));
129   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
130   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
131   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
132   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
133   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
134   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
135   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
136   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
137   PetscFunctionReturn(PETSC_SUCCESS);
138 }
139 
140 static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
141 {
142   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
143   PetscBool iascii;
144 
145   PetscFunctionBegin;
146   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
147   if (iascii) {
148     PetscCall(PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon));
149     PetscCall(PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps));
150     PetscCall(PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max));
151     PetscCall(PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew));
152     PetscCall(PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size));
153     PetscCall(PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size));
154     PetscCall(PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose));
155     PetscCall(PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp));
156   }
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
161 {
162   PC_SPAI *ispai = (PC_SPAI *)pc->data;
163 
164   PetscFunctionBegin;
165   ispai->epsilon = (double)epsilon1;
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
170 {
171   PC_SPAI *ispai = (PC_SPAI *)pc->data;
172 
173   PetscFunctionBegin;
174   ispai->nbsteps = (int)nbsteps1;
175   PetscFunctionReturn(PETSC_SUCCESS);
176 }
177 
178 /* added 1/7/99 g.h. */
179 static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
180 {
181   PC_SPAI *ispai = (PC_SPAI *)pc->data;
182 
183   PetscFunctionBegin;
184   ispai->max = (int)max1;
185   PetscFunctionReturn(PETSC_SUCCESS);
186 }
187 
188 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
189 {
190   PC_SPAI *ispai = (PC_SPAI *)pc->data;
191 
192   PetscFunctionBegin;
193   ispai->maxnew = (int)maxnew1;
194   PetscFunctionReturn(PETSC_SUCCESS);
195 }
196 
197 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
198 {
199   PC_SPAI *ispai = (PC_SPAI *)pc->data;
200 
201   PetscFunctionBegin;
202   ispai->block_size = (int)block_size1;
203   PetscFunctionReturn(PETSC_SUCCESS);
204 }
205 
206 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
207 {
208   PC_SPAI *ispai = (PC_SPAI *)pc->data;
209 
210   PetscFunctionBegin;
211   ispai->cache_size = (int)cache_size;
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
216 {
217   PC_SPAI *ispai = (PC_SPAI *)pc->data;
218 
219   PetscFunctionBegin;
220   ispai->verbose = (int)verbose;
221   PetscFunctionReturn(PETSC_SUCCESS);
222 }
223 
224 static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
225 {
226   PC_SPAI *ispai = (PC_SPAI *)pc->data;
227 
228   PetscFunctionBegin;
229   ispai->sp = (int)sp;
230   PetscFunctionReturn(PETSC_SUCCESS);
231 }
232 
233 /*@
234   PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
235 
236   Input Parameters:
237 + pc       - the preconditioner
238 - epsilon1 - epsilon (default .4)
239 
240   Note:
241   Espilon must be between 0 and 1. It controls the
242   quality of the approximation of M to the inverse of
243   A. Higher values of epsilon lead to more work, more
244   fill, and usually better preconditioners. In many
245   cases the best choice of epsilon is the one that
246   divides the total solution time equally between the
247   preconditioner and the solver.
248 
249   Level: intermediate
250 
251 .seealso: `PCSPAI`, `PCSetType()`
252   @*/
253 PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
254 {
255   PetscFunctionBegin;
256   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
260 /*@
261   PCSPAISetNBSteps - set maximum number of improvement steps per row in
262   the `PCSPAI` preconditioner
263 
264   Input Parameters:
265 + pc       - the preconditioner
266 - nbsteps1 - number of steps (default 5)
267 
268   Note:
269   `PCSPAI` constructs to approximation to every column of
270   the exact inverse of A in a series of improvement
271   steps. The quality of the approximation is determined
272   by epsilon. If an approximation achieving an accuracy
273   of epsilon is not obtained after ns steps, SPAI simply
274   uses the best approximation constructed so far.
275 
276   Level: intermediate
277 
278 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
279 @*/
280 PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
281 {
282   PetscFunctionBegin;
283   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 /* added 1/7/99 g.h. */
288 /*@
289   PCSPAISetMax - set the size of various working buffers in
290   the `PCSPAI` preconditioner
291 
292   Input Parameters:
293 + pc   - the preconditioner
294 - max1 - size (default is 5000)
295 
296   Level: intermediate
297 
298 .seealso: `PCSPAI`, `PCSetType()`
299 @*/
300 PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
301 {
302   PetscFunctionBegin;
303   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
304   PetscFunctionReturn(PETSC_SUCCESS);
305 }
306 
307 /*@
308   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
309   in `PCSPAI` preconditioner
310 
311   Input Parameters:
312 + pc      - the preconditioner
313 - maxnew1 - maximum number (default 5)
314 
315   Level: intermediate
316 
317 .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
318 @*/
319 PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
320 {
321   PetscFunctionBegin;
322   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 /*@
327   PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
328 
329   Input Parameters:
330 + pc          - the preconditioner
331 - block_size1 - block size (default 1)
332 
333   Notes:
334   A block
335   size of 1 treats A as a matrix of scalar elements. A
336   block size of s > 1 treats A as a matrix of sxs
337   blocks. A block size of 0 treats A as a matrix with
338   variable sized blocks, which are determined by
339   searching for dense square diagonal blocks in A.
340   This can be very effective for finite-element
341   matrices.
342 
343   SPAI will convert A to block form, use a block
344   version of the preconditioner algorithm, and then
345   convert the result back to scalar form.
346 
347   In many cases the a block-size parameter other than 1
348   can lead to very significant improvement in
349   performance.
350 
351   Level: intermediate
352 
353 .seealso: `PCSPAI`, `PCSetType()`
354 @*/
355 PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
356 {
357   PetscFunctionBegin;
358   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*@
363   PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
364 
365   Input Parameters:
366 + pc         - the preconditioner
367 - cache_size - cache size {0,1,2,3,4,5} (default 5)
368 
369   Note:
370   `PCSPAI` uses a hash table to cache messages and avoid
371   redundant communication. If suggest always using
372   5. This parameter is irrelevant in the serial
373   version.
374 
375   Level: intermediate
376 
377 .seealso: `PCSPAI`, `PCSetType()`
378 @*/
379 PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
380 {
381   PetscFunctionBegin;
382   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 /*@
387   PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
388 
389   Input Parameters:
390 + pc      - the preconditioner
391 - verbose - level (default 1)
392 
393   Note:
394   print parameters, timings and matrix statistics
395 
396   Level: intermediate
397 
398 .seealso: `PCSPAI`, `PCSetType()`
399 @*/
400 PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
401 {
402   PetscFunctionBegin;
403   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
404   PetscFunctionReturn(PETSC_SUCCESS);
405 }
406 
407 /*@
408   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
409 
410   Input Parameters:
411 + pc - the preconditioner
412 - sp - 0 or 1
413 
414   Note:
415   If A has a symmetric nonzero pattern use -sp 1 to
416   improve performance by eliminating some communication
417   in the parallel version. Even if A does not have a
418   symmetric nonzero pattern -sp 1 may well lead to good
419   results, but the code will not follow the published
420   SPAI algorithm exactly.
421 
422   Level: intermediate
423 
424 .seealso: `PCSPAI`, `PCSetType()`
425 @*/
426 PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
427 {
428   PetscFunctionBegin;
429   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
434 {
435   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
436   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
437   double    epsilon1;
438   PetscBool flg;
439 
440   PetscFunctionBegin;
441   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
442   PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
443   if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
444   PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
445   if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
446   /* added 1/7/99 g.h. */
447   PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
448   if (flg) PetscCall(PCSPAISetMax(pc, max1));
449   PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
450   if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
451   PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
452   if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
453   PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
454   if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
455   PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
456   if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
457   PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
458   if (flg) PetscCall(PCSPAISetSp(pc, sp));
459   PetscOptionsHeadEnd();
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 /*MC
464    PCSPAI - Use the Sparse Approximate Inverse method
465 
466    Options Database Keys:
467 +  -pc_spai_epsilon <eps> - set tolerance
468 .  -pc_spai_nbstep <n> - set nbsteps
469 .  -pc_spai_max <m> - set max
470 .  -pc_spai_max_new <m> - set maxnew
471 .  -pc_spai_block_size <n> - set block size
472 .  -pc_spai_cache_size <n> - set cache size
473 .  -pc_spai_sp <m> - set sp
474 -  -pc_spai_set_verbose <true,false> - verbose output
475 
476    Level: beginner
477 
478    Note:
479     This only works with `MATAIJ` matrices.
480 
481    References:
482  . * -  Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)
483 
484 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
485           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
486           `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
487 M*/
488 
489 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
490 {
491   PC_SPAI *ispai;
492 
493   PetscFunctionBegin;
494   PetscCall(PetscNew(&ispai));
495   pc->data = ispai;
496 
497   pc->ops->destroy         = PCDestroy_SPAI;
498   pc->ops->apply           = PCApply_SPAI;
499   pc->ops->matapply        = PCMatApply_SPAI;
500   pc->ops->applyrichardson = 0;
501   pc->ops->setup           = PCSetUp_SPAI;
502   pc->ops->view            = PCView_SPAI;
503   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
504 
505   ispai->epsilon    = .4;
506   ispai->nbsteps    = 5;
507   ispai->max        = 5000;
508   ispai->maxnew     = 5;
509   ispai->block_size = 1;
510   ispai->cache_size = 5;
511   ispai->verbose    = 0;
512 
513   ispai->sp = 1;
514   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));
515 
516   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
517   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
518   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
519   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
520   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
521   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
522   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
523   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
524   PetscFunctionReturn(PETSC_SUCCESS);
525 }
526 
527 /*
528    Converts from a PETSc matrix to an SPAI matrix
529 */
530 static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
531 {
532   matrix                  *M;
533   int                      i, j, col;
534   int                      row_indx;
535   int                      len, pe, local_indx, start_indx;
536   int                     *mapping;
537   const int               *cols;
538   const double            *vals;
539   int                      n, mnl, nnl, nz, rstart, rend;
540   PetscMPIInt              size, rank;
541   struct compressed_lines *rows;
542 
543   PetscFunctionBegin;
544   PetscCallMPI(MPI_Comm_size(comm, &size));
545   PetscCallMPI(MPI_Comm_rank(comm, &rank));
546   PetscCall(MatGetSize(A, &n, &n));
547   PetscCall(MatGetLocalSize(A, &mnl, &nnl));
548 
549   /*
550     not sure why a barrier is required. commenting out
551   PetscCallMPI(MPI_Barrier(comm));
552   */
553 
554   M = new_matrix((SPAI_Comm)comm);
555 
556   M->n              = n;
557   M->bs             = 1;
558   M->max_block_size = 1;
559 
560   M->mnls          = (int *)malloc(sizeof(int) * size);
561   M->start_indices = (int *)malloc(sizeof(int) * size);
562   M->pe            = (int *)malloc(sizeof(int) * n);
563   M->block_sizes   = (int *)malloc(sizeof(int) * n);
564   for (i = 0; i < n; i++) M->block_sizes[i] = 1;
565 
566   PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
567 
568   M->start_indices[0] = 0;
569   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
570 
571   M->mnl            = M->mnls[M->myid];
572   M->my_start_index = M->start_indices[M->myid];
573 
574   for (i = 0; i < size; i++) {
575     start_indx = M->start_indices[i];
576     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
577   }
578 
579   if (AT) {
580     M->lines = new_compressed_lines(M->mnls[rank], 1);
581   } else {
582     M->lines = new_compressed_lines(M->mnls[rank], 0);
583   }
584 
585   rows = M->lines;
586 
587   /* Determine the mapping from global indices to pointers */
588   PetscCall(PetscMalloc1(M->n, &mapping));
589   pe         = 0;
590   local_indx = 0;
591   for (i = 0; i < M->n; i++) {
592     if (local_indx >= M->mnls[pe]) {
593       pe++;
594       local_indx = 0;
595     }
596     mapping[i] = local_indx + M->start_indices[pe];
597     local_indx++;
598   }
599 
600   /************** Set up the row structure *****************/
601 
602   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
603   for (i = rstart; i < rend; i++) {
604     row_indx = i - rstart;
605     PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
606     /* allocate buffers */
607     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
608     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
609     /* copy the matrix */
610     for (j = 0; j < nz; j++) {
611       col = cols[j];
612       len = rows->len[row_indx]++;
613 
614       rows->ptrs[row_indx][len] = mapping[col];
615       rows->A[row_indx][len]    = vals[j];
616     }
617     rows->slen[row_indx] = rows->len[row_indx];
618 
619     PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
620   }
621 
622   /************** Set up the column structure *****************/
623 
624   if (AT) {
625     for (i = rstart; i < rend; i++) {
626       row_indx = i - rstart;
627       PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
628       /* allocate buffers */
629       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
630       /* copy the matrix (i.e., the structure) */
631       for (j = 0; j < nz; j++) {
632         col = cols[j];
633         len = rows->rlen[row_indx]++;
634 
635         rows->rptrs[row_indx][len] = mapping[col];
636       }
637       PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
638     }
639   }
640 
641   PetscCall(PetscFree(mapping));
642 
643   order_pointers(M);
644   M->maxnz = calc_maxnz(M);
645   *B       = M;
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 /*
650    Converts from an SPAI matrix B  to a PETSc matrix PB.
651    This assumes that the SPAI matrix B is stored in
652    COMPRESSED-ROW format.
653 */
654 static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
655 {
656   PetscMPIInt size, rank;
657   int         m, n, M, N;
658   int         d_nz, o_nz;
659   int        *d_nnz, *o_nnz;
660   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
661   PetscScalar val;
662 
663   PetscFunctionBegin;
664   PetscCallMPI(MPI_Comm_size(comm, &size));
665   PetscCallMPI(MPI_Comm_rank(comm, &rank));
666 
667   m = n = B->mnls[rank];
668   d_nz = o_nz = 0;
669 
670   /* Determine preallocation for MatCreateAIJ */
671   PetscCall(PetscMalloc1(m, &d_nnz));
672   PetscCall(PetscMalloc1(m, &o_nnz));
673   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
674   first_diag_col = B->start_indices[rank];
675   last_diag_col  = first_diag_col + B->mnls[rank];
676   for (i = 0; i < B->mnls[rank]; i++) {
677     for (k = 0; k < B->lines->len[i]; k++) {
678       global_col = B->lines->ptrs[i][k];
679       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
680       else o_nnz[i]++;
681     }
682   }
683 
684   M = N = B->n;
685   /* Here we only know how to create AIJ format */
686   PetscCall(MatCreate(comm, PB));
687   PetscCall(MatSetSizes(*PB, m, n, M, N));
688   PetscCall(MatSetType(*PB, MATAIJ));
689   PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
690   PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
691 
692   for (i = 0; i < B->mnls[rank]; i++) {
693     global_row = B->start_indices[rank] + i;
694     for (k = 0; k < B->lines->len[i]; k++) {
695       global_col = B->lines->ptrs[i][k];
696 
697       val = B->lines->A[i][k];
698       PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
699     }
700   }
701 
702   PetscCall(PetscFree(d_nnz));
703   PetscCall(PetscFree(o_nnz));
704 
705   PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
706   PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
707   PetscFunctionReturn(PETSC_SUCCESS);
708 }
709