xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 
22 #include <petsc/private/pcimpl.h>        /*I "petscpc.h" I*/
23 #include <../src/ksp/pc/impls/spai/petscspai.h>
24 
25 /*
26     These are the SPAI include files
27 */
28 EXTERN_C_BEGIN
29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30 #include <spai.h>
31 #include <matrix.h>
32 EXTERN_C_END
33 
34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
38 
39 typedef struct {
40 
41   matrix *B;                /* matrix in SPAI format */
42   matrix *BT;               /* transpose of matrix in SPAI format */
43   matrix *M;                /* the approximate inverse in SPAI format */
44 
45   Mat PM;                   /* the approximate inverse PETSc format */
46 
47   double epsilon;           /* tolerance */
48   int    nbsteps;           /* max number of "improvement" steps per line */
49   int    max;               /* max dimensions of is_I, q, etc. */
50   int    maxnew;            /* max number of new entries per step */
51   int    block_size;        /* constant block size */
52   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
53   int    verbose;           /* SPAI prints timing and statistics */
54 
55   int      sp;              /* symmetric nonzero pattern */
56   MPI_Comm comm_spai;     /* communicator to be used with spai */
57 } PC_SPAI;
58 
59 /**********************************************************************/
60 
61 static PetscErrorCode PCSetUp_SPAI(PC pc)
62 {
63   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
64   PetscErrorCode ierr;
65   Mat            AT;
66 
67   PetscFunctionBegin;
68   init_SPAI();
69 
70   if (ispai->sp) {
71     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
72   } else {
73     /* Use the transpose to get the column nonzero structure. */
74     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
75     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
76     ierr = MatDestroy(&AT);CHKERRQ(ierr);
77   }
78 
79   /* Destroy the transpose */
80   /* Don't know how to do it. PETSc developers? */
81 
82   /* construct SPAI preconditioner */
83   /* FILE *messages */     /* file for warning messages */
84   /* double epsilon */     /* tolerance */
85   /* int nbsteps */        /* max number of "improvement" steps per line */
86   /* int max */            /* max dimensions of is_I, q, etc. */
87   /* int maxnew */         /* max number of new entries per step */
88   /* int block_size */     /* block_size == 1 specifies scalar elments
89                               block_size == n specifies nxn constant-block elements
90                               block_size == 0 specifies variable-block elements */
91   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
92   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
93                               verbose == 1 prints timing and matrix statistics */
94 
95   ierr = bspai(ispai->B,&ispai->M,
96                stdout,
97                ispai->epsilon,
98                ispai->nbsteps,
99                ispai->max,
100                ispai->maxnew,
101                ispai->block_size,
102                ispai->cache_size,
103                ispai->verbose);CHKERRQ(ierr);
104 
105   ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr);
106 
107   /* free the SPAI matrices */
108   sp_free_matrix(ispai->B);
109   sp_free_matrix(ispai->M);
110   PetscFunctionReturn(0);
111 }
112 
113 /**********************************************************************/
114 
115 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
116 {
117   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
118   PetscErrorCode ierr;
119 
120   PetscFunctionBegin;
121   /* Now using PETSc's multiply */
122   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 static PetscErrorCode PCMatApply_SPAI(PC pc,Mat X,Mat Y)
127 {
128   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
129   PetscErrorCode ierr;
130 
131   PetscFunctionBegin;
132   /* Now using PETSc's multiply */
133   ierr = MatMatMult(ispai->PM,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 /**********************************************************************/
138 
139 static PetscErrorCode PCDestroy_SPAI(PC pc)
140 {
141   PetscErrorCode ierr;
142   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
143 
144   PetscFunctionBegin;
145   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
146   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRMPI(ierr);
147   ierr = PetscFree(pc->data);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 /**********************************************************************/
152 
153 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
154 {
155   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
156   PetscErrorCode ierr;
157   PetscBool      iascii;
158 
159   PetscFunctionBegin;
160   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
161   if (iascii) {
162     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %g\n",   (double)ispai->epsilon);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
169     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 static PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
175 {
176   PC_SPAI *ispai = (PC_SPAI*)pc->data;
177 
178   PetscFunctionBegin;
179   ispai->epsilon = epsilon1;
180   PetscFunctionReturn(0);
181 }
182 
183 /**********************************************************************/
184 
185 static PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
186 {
187   PC_SPAI *ispai = (PC_SPAI*)pc->data;
188 
189   PetscFunctionBegin;
190   ispai->nbsteps = nbsteps1;
191   PetscFunctionReturn(0);
192 }
193 
194 /**********************************************************************/
195 
196 /* added 1/7/99 g.h. */
197 static PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
198 {
199   PC_SPAI *ispai = (PC_SPAI*)pc->data;
200 
201   PetscFunctionBegin;
202   ispai->max = max1;
203   PetscFunctionReturn(0);
204 }
205 
206 /**********************************************************************/
207 
208 static PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
209 {
210   PC_SPAI *ispai = (PC_SPAI*)pc->data;
211 
212   PetscFunctionBegin;
213   ispai->maxnew = maxnew1;
214   PetscFunctionReturn(0);
215 }
216 
217 /**********************************************************************/
218 
219 static PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
220 {
221   PC_SPAI *ispai = (PC_SPAI*)pc->data;
222 
223   PetscFunctionBegin;
224   ispai->block_size = block_size1;
225   PetscFunctionReturn(0);
226 }
227 
228 /**********************************************************************/
229 
230 static PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
231 {
232   PC_SPAI *ispai = (PC_SPAI*)pc->data;
233 
234   PetscFunctionBegin;
235   ispai->cache_size = cache_size;
236   PetscFunctionReturn(0);
237 }
238 
239 /**********************************************************************/
240 
241 static PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
242 {
243   PC_SPAI *ispai = (PC_SPAI*)pc->data;
244 
245   PetscFunctionBegin;
246   ispai->verbose = verbose;
247   PetscFunctionReturn(0);
248 }
249 
250 /**********************************************************************/
251 
252 static PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
253 {
254   PC_SPAI *ispai = (PC_SPAI*)pc->data;
255 
256   PetscFunctionBegin;
257   ispai->sp = sp;
258   PetscFunctionReturn(0);
259 }
260 
261 /* -------------------------------------------------------------------*/
262 
263 /*@
264   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
265 
266   Input Parameters:
267 + pc - the preconditioner
268 - eps - epsilon (default .4)
269 
270   Notes:
271     Espilon must be between 0 and 1. It controls the
272                  quality of the approximation of M to the inverse of
273                  A. Higher values of epsilon lead to more work, more
274                  fill, and usually better preconditioners. In many
275                  cases the best choice of epsilon is the one that
276                  divides the total solution time equally between the
277                  preconditioner and the solver.
278 
279   Level: intermediate
280 
281 .seealso: PCSPAI, PCSetType()
282   @*/
283 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
284 {
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 /**********************************************************************/
293 
294 /*@
295   PCSPAISetNBSteps - set maximum number of improvement steps per row in
296         the SPAI preconditioner
297 
298   Input Parameters:
299 + pc - the preconditioner
300 - n - number of steps (default 5)
301 
302   Notes:
303     SPAI constructs to approximation to every column of
304                  the exact inverse of A in a series of improvement
305                  steps. The quality of the approximation is determined
306                  by epsilon. If an approximation achieving an accuracy
307                  of epsilon is not obtained after ns steps, SPAI simply
308                  uses the best approximation constructed so far.
309 
310   Level: intermediate
311 
312 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
313 @*/
314 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /**********************************************************************/
324 
325 /* added 1/7/99 g.h. */
326 /*@
327   PCSPAISetMax - set the size of various working buffers in
328         the SPAI preconditioner
329 
330   Input Parameters:
331 + pc - the preconditioner
332 - n - size (default is 5000)
333 
334   Level: intermediate
335 
336 .seealso: PCSPAI, PCSetType()
337 @*/
338 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
339 {
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 /**********************************************************************/
348 
349 /*@
350   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
351    in SPAI preconditioner
352 
353   Input Parameters:
354 + pc - the preconditioner
355 - n - maximum number (default 5)
356 
357   Level: intermediate
358 
359 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
360 @*/
361 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
362 {
363   PetscErrorCode ierr;
364 
365   PetscFunctionBegin;
366   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 /**********************************************************************/
371 
372 /*@
373   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
374 
375   Input Parameters:
376 + pc - the preconditioner
377 - n - block size (default 1)
378 
379   Notes:
380     A block
381                  size of 1 treats A as a matrix of scalar elements. A
382                  block size of s > 1 treats A as a matrix of sxs
383                  blocks. A block size of 0 treats A as a matrix with
384                  variable sized blocks, which are determined by
385                  searching for dense square diagonal blocks in A.
386                  This can be very effective for finite-element
387                  matrices.
388 
389                  SPAI will convert A to block form, use a block
390                  version of the preconditioner algorithm, and then
391                  convert the result back to scalar form.
392 
393                  In many cases the a block-size parameter other than 1
394                  can lead to very significant improvement in
395                  performance.
396 
397 
398   Level: intermediate
399 
400 .seealso: PCSPAI, PCSetType()
401 @*/
402 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
403 {
404   PetscErrorCode ierr;
405 
406   PetscFunctionBegin;
407   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 /**********************************************************************/
412 
413 /*@
414   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
415 
416   Input Parameters:
417 + pc - the preconditioner
418 - n -  cache size {0,1,2,3,4,5} (default 5)
419 
420   Notes:
421     SPAI uses a hash table to cache messages and avoid
422                  redundant communication. If suggest always using
423                  5. This parameter is irrelevant in the serial
424                  version.
425 
426   Level: intermediate
427 
428 .seealso: PCSPAI, PCSetType()
429 @*/
430 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
431 {
432   PetscErrorCode ierr;
433 
434   PetscFunctionBegin;
435   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 /**********************************************************************/
440 
441 /*@
442   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
443 
444   Input Parameters:
445 + pc - the preconditioner
446 - n - level (default 1)
447 
448   Notes:
449     print parameters, timings and matrix statistics
450 
451   Level: intermediate
452 
453 .seealso: PCSPAI, PCSetType()
454 @*/
455 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
456 {
457   PetscErrorCode ierr;
458 
459   PetscFunctionBegin;
460   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 /**********************************************************************/
465 
466 /*@
467   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
468 
469   Input Parameters:
470 + pc - the preconditioner
471 - n - 0 or 1
472 
473   Notes:
474     If A has a symmetric nonzero pattern use -sp 1 to
475                  improve performance by eliminating some communication
476                  in the parallel version. Even if A does not have a
477                  symmetric nonzero pattern -sp 1 may well lead to good
478                  results, but the code will not follow the published
479                  SPAI algorithm exactly.
480 
481 
482   Level: intermediate
483 
484 .seealso: PCSPAI, PCSetType()
485 @*/
486 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
487 {
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 /**********************************************************************/
496 
497 /**********************************************************************/
498 
499 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc)
500 {
501   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
502   PetscErrorCode ierr;
503   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
504   double         epsilon1;
505   PetscBool      flg;
506 
507   PetscFunctionBegin;
508   ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr);
509   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
510   if (flg) {
511     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
512   }
513   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
514   if (flg) {
515     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
516   }
517   /* added 1/7/99 g.h. */
518   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
519   if (flg) {
520     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
521   }
522   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
523   if (flg) {
524     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
525   }
526   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
527   if (flg) {
528     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
529   }
530   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
531   if (flg) {
532     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
533   }
534   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
535   if (flg) {
536     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
537   }
538   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
539   if (flg) {
540     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
541   }
542   ierr = PetscOptionsTail();CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 /**********************************************************************/
547 
548 /*MC
549    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
550      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
551 
552    Options Database Keys:
553 +  -pc_spai_epsilon <eps> - set tolerance
554 .  -pc_spai_nbstep <n> - set nbsteps
555 .  -pc_spai_max <m> - set max
556 .  -pc_spai_max_new <m> - set maxnew
557 .  -pc_spai_block_size <n> - set block size
558 .  -pc_spai_cache_size <n> - set cache size
559 .  -pc_spai_sp <m> - set sp
560 -  -pc_spai_set_verbose <true,false> - verbose output
561 
562    Notes:
563     This only works with AIJ matrices.
564 
565    Level: beginner
566 
567 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
568     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
569     PCSPAISetVerbose(), PCSPAISetSp()
570 M*/
571 
572 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
573 {
574   PC_SPAI        *ispai;
575   PetscErrorCode ierr;
576 
577   PetscFunctionBegin;
578   ierr     = PetscNewLog(pc,&ispai);CHKERRQ(ierr);
579   pc->data = ispai;
580 
581   pc->ops->destroy         = PCDestroy_SPAI;
582   pc->ops->apply           = PCApply_SPAI;
583   pc->ops->matapply        = PCMatApply_SPAI;
584   pc->ops->applyrichardson = 0;
585   pc->ops->setup           = PCSetUp_SPAI;
586   pc->ops->view            = PCView_SPAI;
587   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
588 
589   ispai->epsilon    = .4;
590   ispai->nbsteps    = 5;
591   ispai->max        = 5000;
592   ispai->maxnew     = 5;
593   ispai->block_size = 1;
594   ispai->cache_size = 5;
595   ispai->verbose    = 0;
596 
597   ispai->sp = 1;
598   ierr      = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr);
599 
600   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 /**********************************************************************/
612 
613 /*
614    Converts from a PETSc matrix to an SPAI matrix
615 */
616 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
617 {
618   matrix                  *M;
619   int                     i,j,col;
620   int                     row_indx;
621   int                     len,pe,local_indx,start_indx;
622   int                     *mapping;
623   PetscErrorCode          ierr;
624   const int               *cols;
625   const double            *vals;
626   int                     n,mnl,nnl,nz,rstart,rend;
627   PetscMPIInt             size,rank;
628   struct compressed_lines *rows;
629 
630   PetscFunctionBegin;
631   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
632   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
633   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
634   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
635 
636   /*
637     not sure why a barrier is required. commenting out
638   ierr = MPI_Barrier(comm);CHKERRMPI(ierr);
639   */
640 
641   M = new_matrix((SPAI_Comm)comm);
642 
643   M->n              = n;
644   M->bs             = 1;
645   M->max_block_size = 1;
646 
647   M->mnls          = (int*)malloc(sizeof(int)*size);
648   M->start_indices = (int*)malloc(sizeof(int)*size);
649   M->pe            = (int*)malloc(sizeof(int)*n);
650   M->block_sizes   = (int*)malloc(sizeof(int)*n);
651   for (i=0; i<n; i++) M->block_sizes[i] = 1;
652 
653   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRMPI(ierr);
654 
655   M->start_indices[0] = 0;
656   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
657 
658   M->mnl            = M->mnls[M->myid];
659   M->my_start_index = M->start_indices[M->myid];
660 
661   for (i=0; i<size; i++) {
662     start_indx = M->start_indices[i];
663     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
664   }
665 
666   if (AT) {
667     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
668   } else {
669     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
670   }
671 
672   rows = M->lines;
673 
674   /* Determine the mapping from global indices to pointers */
675   ierr       = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr);
676   pe         = 0;
677   local_indx = 0;
678   for (i=0; i<M->n; i++) {
679     if (local_indx >= M->mnls[pe]) {
680       pe++;
681       local_indx = 0;
682     }
683     mapping[i] = local_indx + M->start_indices[pe];
684     local_indx++;
685   }
686 
687   /*********************************************************/
688   /************** Set up the row structure *****************/
689   /*********************************************************/
690 
691   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
692   for (i=rstart; i<rend; i++) {
693     row_indx = i - rstart;
694     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
695     /* allocate buffers */
696     rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int));
697     rows->A[row_indx]    = (double*)malloc(nz*sizeof(double));
698     /* copy the matrix */
699     for (j=0; j<nz; j++) {
700       col = cols[j];
701       len = rows->len[row_indx]++;
702 
703       rows->ptrs[row_indx][len] = mapping[col];
704       rows->A[row_indx][len]    = vals[j];
705     }
706     rows->slen[row_indx] = rows->len[row_indx];
707 
708     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
709   }
710 
711 
712   /************************************************************/
713   /************** Set up the column structure *****************/
714   /*********************************************************/
715 
716   if (AT) {
717 
718     for (i=rstart; i<rend; i++) {
719       row_indx = i - rstart;
720       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
721       /* allocate buffers */
722       rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int));
723       /* copy the matrix (i.e., the structure) */
724       for (j=0; j<nz; j++) {
725         col = cols[j];
726         len = rows->rlen[row_indx]++;
727 
728         rows->rptrs[row_indx][len] = mapping[col];
729       }
730       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
731     }
732   }
733 
734   ierr = PetscFree(mapping);CHKERRQ(ierr);
735 
736   order_pointers(M);
737   M->maxnz = calc_maxnz(M);
738   *B       = M;
739   PetscFunctionReturn(0);
740 }
741 
742 /**********************************************************************/
743 
744 /*
745    Converts from an SPAI matrix B  to a PETSc matrix PB.
746    This assumes that the SPAI matrix B is stored in
747    COMPRESSED-ROW format.
748 */
749 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
750 {
751   PetscMPIInt    size,rank;
752   PetscErrorCode ierr;
753   int            m,n,M,N;
754   int            d_nz,o_nz;
755   int            *d_nnz,*o_nnz;
756   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
757   PetscScalar    val;
758 
759   PetscFunctionBegin;
760   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
761   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
762 
763   m    = n = B->mnls[rank];
764   d_nz = o_nz = 0;
765 
766   /* Determine preallocation for MatCreateMPIAIJ */
767   ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr);
768   ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr);
769   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
770   first_diag_col = B->start_indices[rank];
771   last_diag_col  = first_diag_col + B->mnls[rank];
772   for (i=0; i<B->mnls[rank]; i++) {
773     for (k=0; k<B->lines->len[i]; k++) {
774       global_col = B->lines->ptrs[i][k];
775       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
776       else o_nnz[i]++;
777     }
778   }
779 
780   M = N = B->n;
781   /* Here we only know how to create AIJ format */
782   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
783   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
784   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
785   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
786   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
787 
788   for (i=0; i<B->mnls[rank]; i++) {
789     global_row = B->start_indices[rank]+i;
790     for (k=0; k<B->lines->len[i]; k++) {
791       global_col = B->lines->ptrs[i][k];
792 
793       val  = B->lines->A[i][k];
794       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
795     }
796   }
797 
798   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
799   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
800 
801   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
802   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
803   PetscFunctionReturn(0);
804 }
805 
806 /**********************************************************************/
807 
808 /*
809    Converts from an SPAI vector v  to a PETSc vec Pv.
810 */
811 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
812 {
813   PetscErrorCode ierr;
814   PetscMPIInt    size,rank;
815   int            m,M,i,*mnls,*start_indices,*global_indices;
816 
817   PetscFunctionBegin;
818   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
819   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
820 
821   m = v->mnl;
822   M = v->n;
823 
824 
825   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
826 
827   ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr);
828   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRMPI(ierr);
829 
830   ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr);
831 
832   start_indices[0] = 0;
833   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
834 
835   ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr);
836   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
837 
838   ierr = PetscFree(mnls);CHKERRQ(ierr);
839   ierr = PetscFree(start_indices);CHKERRQ(ierr);
840 
841   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
842   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
843   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
844 
845   ierr = PetscFree(global_indices);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848