xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 7d5fd1e4d9337468ad3f05b65b7facdcd2dfd2a4)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 
22 #include <petsc/private/pcimpl.h>        /*I "petscpc.h" I*/
23 #include <../src/ksp/pc/impls/spai/petscspai.h>
24 
25 /*
26     These are the SPAI include files
27 */
28 EXTERN_C_BEGIN
29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30 #include <spai.h>
31 #include <matrix.h>
32 EXTERN_C_END
33 
34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
38 
39 typedef struct {
40 
41   matrix *B;                /* matrix in SPAI format */
42   matrix *BT;               /* transpose of matrix in SPAI format */
43   matrix *M;                /* the approximate inverse in SPAI format */
44 
45   Mat PM;                   /* the approximate inverse PETSc format */
46 
47   double epsilon;           /* tolerance */
48   int    nbsteps;           /* max number of "improvement" steps per line */
49   int    max;               /* max dimensions of is_I, q, etc. */
50   int    maxnew;            /* max number of new entries per step */
51   int    block_size;        /* constant block size */
52   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
53   int    verbose;           /* SPAI prints timing and statistics */
54 
55   int      sp;              /* symmetric nonzero pattern */
56   MPI_Comm comm_spai;     /* communicator to be used with spai */
57 } PC_SPAI;
58 
59 /**********************************************************************/
60 
61 static PetscErrorCode PCSetUp_SPAI(PC pc)
62 {
63   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
64   PetscErrorCode ierr;
65   Mat            AT;
66 
67   PetscFunctionBegin;
68   init_SPAI();
69 
70   if (ispai->sp) {
71     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
72   } else {
73     /* Use the transpose to get the column nonzero structure. */
74     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
75     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
76     ierr = MatDestroy(&AT);CHKERRQ(ierr);
77   }
78 
79   /* Destroy the transpose */
80   /* Don't know how to do it. PETSc developers? */
81 
82   /* construct SPAI preconditioner */
83   /* FILE *messages */     /* file for warning messages */
84   /* double epsilon */     /* tolerance */
85   /* int nbsteps */        /* max number of "improvement" steps per line */
86   /* int max */            /* max dimensions of is_I, q, etc. */
87   /* int maxnew */         /* max number of new entries per step */
88   /* int block_size */     /* block_size == 1 specifies scalar elments
89                               block_size == n specifies nxn constant-block elements
90                               block_size == 0 specifies variable-block elements */
91   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
92   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
93                               verbose == 1 prints timing and matrix statistics */
94 
95   ierr = bspai(ispai->B,&ispai->M,
96                stdout,
97                ispai->epsilon,
98                ispai->nbsteps,
99                ispai->max,
100                ispai->maxnew,
101                ispai->block_size,
102                ispai->cache_size,
103                ispai->verbose);CHKERRQ(ierr);
104 
105   ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr);
106 
107   /* free the SPAI matrices */
108   sp_free_matrix(ispai->B);
109   sp_free_matrix(ispai->M);
110   PetscFunctionReturn(0);
111 }
112 
113 /**********************************************************************/
114 
115 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
116 {
117   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
118   PetscErrorCode ierr;
119 
120   PetscFunctionBegin;
121   /* Now using PETSc's multiply */
122   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 static PetscErrorCode PCMatApply_SPAI(PC pc,Mat X,Mat Y)
127 {
128   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
129   PetscErrorCode ierr;
130 
131   PetscFunctionBegin;
132   /* Now using PETSc's multiply */
133   ierr = MatMatMult(ispai->PM,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 /**********************************************************************/
138 
139 static PetscErrorCode PCDestroy_SPAI(PC pc)
140 {
141   PetscErrorCode ierr;
142   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
143 
144   PetscFunctionBegin;
145   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
146   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRMPI(ierr);
147   ierr = PetscFree(pc->data);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 /**********************************************************************/
152 
153 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
154 {
155   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
156   PetscErrorCode ierr;
157   PetscBool      iascii;
158 
159   PetscFunctionBegin;
160   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
161   if (iascii) {
162     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %g\n",   (double)ispai->epsilon);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
169     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 static PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
175 {
176   PC_SPAI *ispai = (PC_SPAI*)pc->data;
177 
178   PetscFunctionBegin;
179   ispai->epsilon = epsilon1;
180   PetscFunctionReturn(0);
181 }
182 
183 /**********************************************************************/
184 
185 static PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
186 {
187   PC_SPAI *ispai = (PC_SPAI*)pc->data;
188 
189   PetscFunctionBegin;
190   ispai->nbsteps = nbsteps1;
191   PetscFunctionReturn(0);
192 }
193 
194 /**********************************************************************/
195 
196 /* added 1/7/99 g.h. */
197 static PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
198 {
199   PC_SPAI *ispai = (PC_SPAI*)pc->data;
200 
201   PetscFunctionBegin;
202   ispai->max = max1;
203   PetscFunctionReturn(0);
204 }
205 
206 /**********************************************************************/
207 
208 static PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
209 {
210   PC_SPAI *ispai = (PC_SPAI*)pc->data;
211 
212   PetscFunctionBegin;
213   ispai->maxnew = maxnew1;
214   PetscFunctionReturn(0);
215 }
216 
217 /**********************************************************************/
218 
219 static PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
220 {
221   PC_SPAI *ispai = (PC_SPAI*)pc->data;
222 
223   PetscFunctionBegin;
224   ispai->block_size = block_size1;
225   PetscFunctionReturn(0);
226 }
227 
228 /**********************************************************************/
229 
230 static PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
231 {
232   PC_SPAI *ispai = (PC_SPAI*)pc->data;
233 
234   PetscFunctionBegin;
235   ispai->cache_size = cache_size;
236   PetscFunctionReturn(0);
237 }
238 
239 /**********************************************************************/
240 
241 static PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
242 {
243   PC_SPAI *ispai = (PC_SPAI*)pc->data;
244 
245   PetscFunctionBegin;
246   ispai->verbose = verbose;
247   PetscFunctionReturn(0);
248 }
249 
250 /**********************************************************************/
251 
252 static PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
253 {
254   PC_SPAI *ispai = (PC_SPAI*)pc->data;
255 
256   PetscFunctionBegin;
257   ispai->sp = sp;
258   PetscFunctionReturn(0);
259 }
260 
261 /* -------------------------------------------------------------------*/
262 
263 /*@
264   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
265 
266   Input Parameters:
267 + pc - the preconditioner
268 - eps - epsilon (default .4)
269 
270   Notes:
271     Espilon must be between 0 and 1. It controls the
272                  quality of the approximation of M to the inverse of
273                  A. Higher values of epsilon lead to more work, more
274                  fill, and usually better preconditioners. In many
275                  cases the best choice of epsilon is the one that
276                  divides the total solution time equally between the
277                  preconditioner and the solver.
278 
279   Level: intermediate
280 
281 .seealso: PCSPAI, PCSetType()
282   @*/
283 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
284 {
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 /**********************************************************************/
293 
294 /*@
295   PCSPAISetNBSteps - set maximum number of improvement steps per row in
296         the SPAI preconditioner
297 
298   Input Parameters:
299 + pc - the preconditioner
300 - n - number of steps (default 5)
301 
302   Notes:
303     SPAI constructs to approximation to every column of
304                  the exact inverse of A in a series of improvement
305                  steps. The quality of the approximation is determined
306                  by epsilon. If an approximation achieving an accuracy
307                  of epsilon is not obtained after ns steps, SPAI simply
308                  uses the best approximation constructed so far.
309 
310   Level: intermediate
311 
312 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
313 @*/
314 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /**********************************************************************/
324 
325 /* added 1/7/99 g.h. */
326 /*@
327   PCSPAISetMax - set the size of various working buffers in
328         the SPAI preconditioner
329 
330   Input Parameters:
331 + pc - the preconditioner
332 - n - size (default is 5000)
333 
334   Level: intermediate
335 
336 .seealso: PCSPAI, PCSetType()
337 @*/
338 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
339 {
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 /**********************************************************************/
348 
349 /*@
350   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
351    in SPAI preconditioner
352 
353   Input Parameters:
354 + pc - the preconditioner
355 - n - maximum number (default 5)
356 
357   Level: intermediate
358 
359 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
360 @*/
361 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
362 {
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 /**********************************************************************/
371 
372 /*@
373   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
374 
375   Input Parameters:
376 + pc - the preconditioner
377 - n - block size (default 1)
378 
379   Notes:
380     A block
381                  size of 1 treats A as a matrix of scalar elements. A
382                  block size of s > 1 treats A as a matrix of sxs
383                  blocks. A block size of 0 treats A as a matrix with
384                  variable sized blocks, which are determined by
385                  searching for dense square diagonal blocks in A.
386                  This can be very effective for finite-element
387                  matrices.
388 
389                  SPAI will convert A to block form, use a block
390                  version of the preconditioner algorithm, and then
391                  convert the result back to scalar form.
392 
393                  In many cases the a block-size parameter other than 1
394                  can lead to very significant improvement in
395                  performance.
396 
397   Level: intermediate
398 
399 .seealso: PCSPAI, PCSetType()
400 @*/
401 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
402 {
403   PetscErrorCode ierr;
404 
405   PetscFunctionBegin;
406   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 /**********************************************************************/
411 
412 /*@
413   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
414 
415   Input Parameters:
416 + pc - the preconditioner
417 - n -  cache size {0,1,2,3,4,5} (default 5)
418 
419   Notes:
420     SPAI uses a hash table to cache messages and avoid
421                  redundant communication. If suggest always using
422                  5. This parameter is irrelevant in the serial
423                  version.
424 
425   Level: intermediate
426 
427 .seealso: PCSPAI, PCSetType()
428 @*/
429 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
430 {
431   PetscErrorCode ierr;
432 
433   PetscFunctionBegin;
434   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 /**********************************************************************/
439 
440 /*@
441   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
442 
443   Input Parameters:
444 + pc - the preconditioner
445 - n - level (default 1)
446 
447   Notes:
448     print parameters, timings and matrix statistics
449 
450   Level: intermediate
451 
452 .seealso: PCSPAI, PCSetType()
453 @*/
454 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
455 {
456   PetscErrorCode ierr;
457 
458   PetscFunctionBegin;
459   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 /**********************************************************************/
464 
465 /*@
466   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
467 
468   Input Parameters:
469 + pc - the preconditioner
470 - n - 0 or 1
471 
472   Notes:
473     If A has a symmetric nonzero pattern use -sp 1 to
474                  improve performance by eliminating some communication
475                  in the parallel version. Even if A does not have a
476                  symmetric nonzero pattern -sp 1 may well lead to good
477                  results, but the code will not follow the published
478                  SPAI algorithm exactly.
479 
480   Level: intermediate
481 
482 .seealso: PCSPAI, PCSetType()
483 @*/
484 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
485 {
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 /**********************************************************************/
494 
495 /**********************************************************************/
496 
497 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc)
498 {
499   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
500   PetscErrorCode ierr;
501   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
502   double         epsilon1;
503   PetscBool      flg;
504 
505   PetscFunctionBegin;
506   ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr);
507   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
508   if (flg) {
509     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
510   }
511   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
512   if (flg) {
513     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
514   }
515   /* added 1/7/99 g.h. */
516   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
517   if (flg) {
518     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
519   }
520   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
521   if (flg) {
522     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
523   }
524   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
525   if (flg) {
526     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
527   }
528   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
529   if (flg) {
530     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
531   }
532   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
533   if (flg) {
534     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
535   }
536   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
537   if (flg) {
538     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
539   }
540   ierr = PetscOptionsTail();CHKERRQ(ierr);
541   PetscFunctionReturn(0);
542 }
543 
544 /**********************************************************************/
545 
546 /*MC
547    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
548      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
549 
550    Options Database Keys:
551 +  -pc_spai_epsilon <eps> - set tolerance
552 .  -pc_spai_nbstep <n> - set nbsteps
553 .  -pc_spai_max <m> - set max
554 .  -pc_spai_max_new <m> - set maxnew
555 .  -pc_spai_block_size <n> - set block size
556 .  -pc_spai_cache_size <n> - set cache size
557 .  -pc_spai_sp <m> - set sp
558 -  -pc_spai_set_verbose <true,false> - verbose output
559 
560    Notes:
561     This only works with AIJ matrices.
562 
563    Level: beginner
564 
565 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
566     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
567     PCSPAISetVerbose(), PCSPAISetSp()
568 M*/
569 
570 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
571 {
572   PC_SPAI        *ispai;
573   PetscErrorCode ierr;
574 
575   PetscFunctionBegin;
576   ierr     = PetscNewLog(pc,&ispai);CHKERRQ(ierr);
577   pc->data = ispai;
578 
579   pc->ops->destroy         = PCDestroy_SPAI;
580   pc->ops->apply           = PCApply_SPAI;
581   pc->ops->matapply        = PCMatApply_SPAI;
582   pc->ops->applyrichardson = 0;
583   pc->ops->setup           = PCSetUp_SPAI;
584   pc->ops->view            = PCView_SPAI;
585   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
586 
587   ispai->epsilon    = .4;
588   ispai->nbsteps    = 5;
589   ispai->max        = 5000;
590   ispai->maxnew     = 5;
591   ispai->block_size = 1;
592   ispai->cache_size = 5;
593   ispai->verbose    = 0;
594 
595   ispai->sp = 1;
596   ierr      = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRMPI(ierr);
597 
598   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 /**********************************************************************/
610 
611 /*
612    Converts from a PETSc matrix to an SPAI matrix
613 */
614 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
615 {
616   matrix                  *M;
617   int                     i,j,col;
618   int                     row_indx;
619   int                     len,pe,local_indx,start_indx;
620   int                     *mapping;
621   PetscErrorCode          ierr;
622   const int               *cols;
623   const double            *vals;
624   int                     n,mnl,nnl,nz,rstart,rend;
625   PetscMPIInt             size,rank;
626   struct compressed_lines *rows;
627 
628   PetscFunctionBegin;
629   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
630   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
631   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
632   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
633 
634   /*
635     not sure why a barrier is required. commenting out
636   ierr = MPI_Barrier(comm);CHKERRMPI(ierr);
637   */
638 
639   M = new_matrix((SPAI_Comm)comm);
640 
641   M->n              = n;
642   M->bs             = 1;
643   M->max_block_size = 1;
644 
645   M->mnls          = (int*)malloc(sizeof(int)*size);
646   M->start_indices = (int*)malloc(sizeof(int)*size);
647   M->pe            = (int*)malloc(sizeof(int)*n);
648   M->block_sizes   = (int*)malloc(sizeof(int)*n);
649   for (i=0; i<n; i++) M->block_sizes[i] = 1;
650 
651   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRMPI(ierr);
652 
653   M->start_indices[0] = 0;
654   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
655 
656   M->mnl            = M->mnls[M->myid];
657   M->my_start_index = M->start_indices[M->myid];
658 
659   for (i=0; i<size; i++) {
660     start_indx = M->start_indices[i];
661     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
662   }
663 
664   if (AT) {
665     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
666   } else {
667     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
668   }
669 
670   rows = M->lines;
671 
672   /* Determine the mapping from global indices to pointers */
673   ierr       = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr);
674   pe         = 0;
675   local_indx = 0;
676   for (i=0; i<M->n; i++) {
677     if (local_indx >= M->mnls[pe]) {
678       pe++;
679       local_indx = 0;
680     }
681     mapping[i] = local_indx + M->start_indices[pe];
682     local_indx++;
683   }
684 
685   /*********************************************************/
686   /************** Set up the row structure *****************/
687   /*********************************************************/
688 
689   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
690   for (i=rstart; i<rend; i++) {
691     row_indx = i - rstart;
692     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
693     /* allocate buffers */
694     rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int));
695     rows->A[row_indx]    = (double*)malloc(nz*sizeof(double));
696     /* copy the matrix */
697     for (j=0; j<nz; j++) {
698       col = cols[j];
699       len = rows->len[row_indx]++;
700 
701       rows->ptrs[row_indx][len] = mapping[col];
702       rows->A[row_indx][len]    = vals[j];
703     }
704     rows->slen[row_indx] = rows->len[row_indx];
705 
706     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
707   }
708 
709   /************************************************************/
710   /************** Set up the column structure *****************/
711   /*********************************************************/
712 
713   if (AT) {
714 
715     for (i=rstart; i<rend; i++) {
716       row_indx = i - rstart;
717       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
718       /* allocate buffers */
719       rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int));
720       /* copy the matrix (i.e., the structure) */
721       for (j=0; j<nz; j++) {
722         col = cols[j];
723         len = rows->rlen[row_indx]++;
724 
725         rows->rptrs[row_indx][len] = mapping[col];
726       }
727       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
728     }
729   }
730 
731   ierr = PetscFree(mapping);CHKERRQ(ierr);
732 
733   order_pointers(M);
734   M->maxnz = calc_maxnz(M);
735   *B       = M;
736   PetscFunctionReturn(0);
737 }
738 
739 /**********************************************************************/
740 
741 /*
742    Converts from an SPAI matrix B  to a PETSc matrix PB.
743    This assumes that the SPAI matrix B is stored in
744    COMPRESSED-ROW format.
745 */
746 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
747 {
748   PetscMPIInt    size,rank;
749   PetscErrorCode ierr;
750   int            m,n,M,N;
751   int            d_nz,o_nz;
752   int            *d_nnz,*o_nnz;
753   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
754   PetscScalar    val;
755 
756   PetscFunctionBegin;
757   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
758   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
759 
760   m    = n = B->mnls[rank];
761   d_nz = o_nz = 0;
762 
763   /* Determine preallocation for MatCreateAIJ */
764   ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr);
765   ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr);
766   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
767   first_diag_col = B->start_indices[rank];
768   last_diag_col  = first_diag_col + B->mnls[rank];
769   for (i=0; i<B->mnls[rank]; i++) {
770     for (k=0; k<B->lines->len[i]; k++) {
771       global_col = B->lines->ptrs[i][k];
772       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
773       else o_nnz[i]++;
774     }
775   }
776 
777   M = N = B->n;
778   /* Here we only know how to create AIJ format */
779   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
780   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
781   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
782   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
783   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
784 
785   for (i=0; i<B->mnls[rank]; i++) {
786     global_row = B->start_indices[rank]+i;
787     for (k=0; k<B->lines->len[i]; k++) {
788       global_col = B->lines->ptrs[i][k];
789 
790       val  = B->lines->A[i][k];
791       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
792     }
793   }
794 
795   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
796   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
797 
798   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
799   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
800   PetscFunctionReturn(0);
801 }
802 
803 /**********************************************************************/
804 
805 /*
806    Converts from an SPAI vector v  to a PETSc vec Pv.
807 */
808 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
809 {
810   PetscErrorCode ierr;
811   PetscMPIInt    size,rank;
812   int            m,M,i,*mnls,*start_indices,*global_indices;
813 
814   PetscFunctionBegin;
815   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
816   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
817 
818   m = v->mnl;
819   M = v->n;
820 
821   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
822 
823   ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr);
824   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRMPI(ierr);
825 
826   ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr);
827 
828   start_indices[0] = 0;
829   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
830 
831   ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr);
832   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
833 
834   ierr = PetscFree(mnls);CHKERRQ(ierr);
835   ierr = PetscFree(start_indices);CHKERRQ(ierr);
836 
837   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
838   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
839   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
840 
841   ierr = PetscFree(global_indices);CHKERRQ(ierr);
842   PetscFunctionReturn(0);
843 }
844