xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision cf1aed2ce99d23e50336629af3ca8cf096900abb)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include <../src/ksp/pc/impls/spai/petscspai.h>
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
37 
38 typedef struct {
39 
40   matrix *B;                /* matrix in SPAI format */
41   matrix *BT;               /* transpose of matrix in SPAI format */
42   matrix *M;                /* the approximate inverse in SPAI format */
43 
44   Mat PM;                   /* the approximate inverse PETSc format */
45 
46   double epsilon;           /* tolerance */
47   int    nbsteps;           /* max number of "improvement" steps per line */
48   int    max;               /* max dimensions of is_I, q, etc. */
49   int    maxnew;            /* max number of new entries per step */
50   int    block_size;        /* constant block size */
51   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;           /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
93   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
94                               verbose == 1 prints timing and matrix statistics */
95 
96   ierr = bspai(ispai->B,&ispai->M,
97                stdout,
98                ispai->epsilon,
99                ispai->nbsteps,
100                ispai->max,
101                ispai->maxnew,
102                ispai->block_size,
103                ispai->cache_size,
104                ispai->verbose);CHKERRQ(ierr);
105 
106   ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr);
107 
108   /* free the SPAI matrices */
109   sp_free_matrix(ispai->B);
110   sp_free_matrix(ispai->M);
111   PetscFunctionReturn(0);
112 }
113 
114 /**********************************************************************/
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCApply_SPAI"
118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
119 {
120   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   /* Now using PETSc's multiply */
125   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 /**********************************************************************/
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "PCDestroy_SPAI"
133 static PetscErrorCode PCDestroy_SPAI(PC pc)
134 {
135   PetscErrorCode ierr;
136   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
140   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
141   ierr = PetscFree(pc->data);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 /**********************************************************************/
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "PCView_SPAI"
149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
150 {
151   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
152   PetscErrorCode ierr;
153   PetscBool      iascii;
154 
155   PetscFunctionBegin;
156   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
157   if (iascii) {
158     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
159     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %g\n",   (double)ispai->epsilon);CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
173 static PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
174 {
175   PC_SPAI *ispai = (PC_SPAI*)pc->data;
176 
177   PetscFunctionBegin;
178   ispai->epsilon = epsilon1;
179   PetscFunctionReturn(0);
180 }
181 
182 /**********************************************************************/
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
186 static PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
187 {
188   PC_SPAI *ispai = (PC_SPAI*)pc->data;
189 
190   PetscFunctionBegin;
191   ispai->nbsteps = nbsteps1;
192   PetscFunctionReturn(0);
193 }
194 
195 /**********************************************************************/
196 
197 /* added 1/7/99 g.h. */
198 #undef __FUNCT__
199 #define __FUNCT__ "PCSPAISetMax_SPAI"
200 static PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
201 {
202   PC_SPAI *ispai = (PC_SPAI*)pc->data;
203 
204   PetscFunctionBegin;
205   ispai->max = max1;
206   PetscFunctionReturn(0);
207 }
208 
209 /**********************************************************************/
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
213 static PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
214 {
215   PC_SPAI *ispai = (PC_SPAI*)pc->data;
216 
217   PetscFunctionBegin;
218   ispai->maxnew = maxnew1;
219   PetscFunctionReturn(0);
220 }
221 
222 /**********************************************************************/
223 
224 #undef __FUNCT__
225 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
226 static PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
227 {
228   PC_SPAI *ispai = (PC_SPAI*)pc->data;
229 
230   PetscFunctionBegin;
231   ispai->block_size = block_size1;
232   PetscFunctionReturn(0);
233 }
234 
235 /**********************************************************************/
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
239 static PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
240 {
241   PC_SPAI *ispai = (PC_SPAI*)pc->data;
242 
243   PetscFunctionBegin;
244   ispai->cache_size = cache_size;
245   PetscFunctionReturn(0);
246 }
247 
248 /**********************************************************************/
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
252 static PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
253 {
254   PC_SPAI *ispai = (PC_SPAI*)pc->data;
255 
256   PetscFunctionBegin;
257   ispai->verbose = verbose;
258   PetscFunctionReturn(0);
259 }
260 
261 /**********************************************************************/
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "PCSPAISetSp_SPAI"
265 static PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
266 {
267   PC_SPAI *ispai = (PC_SPAI*)pc->data;
268 
269   PetscFunctionBegin;
270   ispai->sp = sp;
271   PetscFunctionReturn(0);
272 }
273 
274 /* -------------------------------------------------------------------*/
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "PCSPAISetEpsilon"
278 /*@
279   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
280 
281   Input Parameters:
282 + pc - the preconditioner
283 - eps - epsilon (default .4)
284 
285   Notes:  Espilon must be between 0 and 1. It controls the
286                  quality of the approximation of M to the inverse of
287                  A. Higher values of epsilon lead to more work, more
288                  fill, and usually better preconditioners. In many
289                  cases the best choice of epsilon is the one that
290                  divides the total solution time equally between the
291                  preconditioner and the solver.
292 
293   Level: intermediate
294 
295 .seealso: PCSPAI, PCSetType()
296   @*/
297 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
298 {
299   PetscErrorCode ierr;
300 
301   PetscFunctionBegin;
302   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 /**********************************************************************/
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "PCSPAISetNBSteps"
310 /*@
311   PCSPAISetNBSteps - set maximum number of improvement steps per row in
312         the SPAI preconditioner
313 
314   Input Parameters:
315 + pc - the preconditioner
316 - n - number of steps (default 5)
317 
318   Notes:  SPAI constructs to approximation to every column of
319                  the exact inverse of A in a series of improvement
320                  steps. The quality of the approximation is determined
321                  by epsilon. If an approximation achieving an accuracy
322                  of epsilon is not obtained after ns steps, SPAI simply
323                  uses the best approximation constructed so far.
324 
325   Level: intermediate
326 
327 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
328 @*/
329 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
330 {
331   PetscErrorCode ierr;
332 
333   PetscFunctionBegin;
334   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 /**********************************************************************/
339 
340 /* added 1/7/99 g.h. */
341 #undef __FUNCT__
342 #define __FUNCT__ "PCSPAISetMax"
343 /*@
344   PCSPAISetMax - set the size of various working buffers in
345         the SPAI preconditioner
346 
347   Input Parameters:
348 + pc - the preconditioner
349 - n - size (default is 5000)
350 
351   Level: intermediate
352 
353 .seealso: PCSPAI, PCSetType()
354 @*/
355 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
356 {
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 
364 /**********************************************************************/
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "PCSPAISetMaxNew"
368 /*@
369   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
370    in SPAI preconditioner
371 
372   Input Parameters:
373 + pc - the preconditioner
374 - n - maximum number (default 5)
375 
376   Level: intermediate
377 
378 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
379 @*/
380 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
381 {
382   PetscErrorCode ierr;
383 
384   PetscFunctionBegin;
385   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 /**********************************************************************/
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "PCSPAISetBlockSize"
393 /*@
394   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
395 
396   Input Parameters:
397 + pc - the preconditioner
398 - n - block size (default 1)
399 
400   Notes: A block
401                  size of 1 treats A as a matrix of scalar elements. A
402                  block size of s > 1 treats A as a matrix of sxs
403                  blocks. A block size of 0 treats A as a matrix with
404                  variable sized blocks, which are determined by
405                  searching for dense square diagonal blocks in A.
406                  This can be very effective for finite-element
407                  matrices.
408 
409                  SPAI will convert A to block form, use a block
410                  version of the preconditioner algorithm, and then
411                  convert the result back to scalar form.
412 
413                  In many cases the a block-size parameter other than 1
414                  can lead to very significant improvement in
415                  performance.
416 
417 
418   Level: intermediate
419 
420 .seealso: PCSPAI, PCSetType()
421 @*/
422 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
423 {
424   PetscErrorCode ierr;
425 
426   PetscFunctionBegin;
427   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 /**********************************************************************/
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "PCSPAISetCacheSize"
435 /*@
436   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
437 
438   Input Parameters:
439 + pc - the preconditioner
440 - n -  cache size {0,1,2,3,4,5} (default 5)
441 
442   Notes:    SPAI uses a hash table to cache messages and avoid
443                  redundant communication. If suggest always using
444                  5. This parameter is irrelevant in the serial
445                  version.
446 
447   Level: intermediate
448 
449 .seealso: PCSPAI, PCSetType()
450 @*/
451 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
452 {
453   PetscErrorCode ierr;
454 
455   PetscFunctionBegin;
456   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 /**********************************************************************/
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "PCSPAISetVerbose"
464 /*@
465   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
466 
467   Input Parameters:
468 + pc - the preconditioner
469 - n - level (default 1)
470 
471   Notes: print parameters, timings and matrix statistics
472 
473   Level: intermediate
474 
475 .seealso: PCSPAI, PCSetType()
476 @*/
477 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
478 {
479   PetscErrorCode ierr;
480 
481   PetscFunctionBegin;
482   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 /**********************************************************************/
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "PCSPAISetSp"
490 /*@
491   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
492 
493   Input Parameters:
494 + pc - the preconditioner
495 - n - 0 or 1
496 
497   Notes: If A has a symmetric nonzero pattern use -sp 1 to
498                  improve performance by eliminating some communication
499                  in the parallel version. Even if A does not have a
500                  symmetric nonzero pattern -sp 1 may well lead to good
501                  results, but the code will not follow the published
502                  SPAI algorithm exactly.
503 
504 
505   Level: intermediate
506 
507 .seealso: PCSPAI, PCSetType()
508 @*/
509 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
510 {
511   PetscErrorCode ierr;
512 
513   PetscFunctionBegin;
514   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 /**********************************************************************/
519 
520 /**********************************************************************/
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "PCSetFromOptions_SPAI"
524 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
525 {
526   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
527   PetscErrorCode ierr;
528   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
529   double         epsilon1;
530   PetscBool      flg;
531 
532   PetscFunctionBegin;
533   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
534   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
535   if (flg) {
536     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
537   }
538   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
539   if (flg) {
540     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
541   }
542   /* added 1/7/99 g.h. */
543   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
544   if (flg) {
545     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
546   }
547   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
548   if (flg) {
549     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
550   }
551   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
552   if (flg) {
553     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
554   }
555   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
556   if (flg) {
557     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
558   }
559   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
560   if (flg) {
561     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
562   }
563   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
564   if (flg) {
565     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
566   }
567   ierr = PetscOptionsTail();CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 /**********************************************************************/
572 
573 /*MC
574    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
575      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
576 
577    Options Database Keys:
578 +  -pc_spai_epsilon <eps> - set tolerance
579 .  -pc_spai_nbstep <n> - set nbsteps
580 .  -pc_spai_max <m> - set max
581 .  -pc_spai_max_new <m> - set maxnew
582 .  -pc_spai_block_size <n> - set block size
583 .  -pc_spai_cache_size <n> - set cache size
584 .  -pc_spai_sp <m> - set sp
585 -  -pc_spai_set_verbose <true,false> - verbose output
586 
587    Notes: This only works with AIJ matrices.
588 
589    Level: beginner
590 
591    Concepts: approximate inverse
592 
593 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
594     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
595     PCSPAISetVerbose(), PCSPAISetSp()
596 M*/
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "PCCreate_SPAI"
600 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
601 {
602   PC_SPAI        *ispai;
603   PetscErrorCode ierr;
604 
605   PetscFunctionBegin;
606   ierr     = PetscNewLog(pc,&ispai);CHKERRQ(ierr);
607   pc->data = ispai;
608 
609   pc->ops->destroy         = PCDestroy_SPAI;
610   pc->ops->apply           = PCApply_SPAI;
611   pc->ops->applyrichardson = 0;
612   pc->ops->setup           = PCSetUp_SPAI;
613   pc->ops->view            = PCView_SPAI;
614   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
615 
616   ispai->epsilon    = .4;
617   ispai->nbsteps    = 5;
618   ispai->max        = 5000;
619   ispai->maxnew     = 5;
620   ispai->block_size = 1;
621   ispai->cache_size = 5;
622   ispai->verbose    = 0;
623 
624   ispai->sp = 1;
625   ierr      = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr);
626 
627   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
632   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
633   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
634   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr);
635   PetscFunctionReturn(0);
636 }
637 
638 /**********************************************************************/
639 
640 /*
641    Converts from a PETSc matrix to an SPAI matrix
642 */
643 #undef __FUNCT__
644 #define __FUNCT__ "ConvertMatToMatrix"
645 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
646 {
647   matrix                  *M;
648   int                     i,j,col;
649   int                     row_indx;
650   int                     len,pe,local_indx,start_indx;
651   int                     *mapping;
652   PetscErrorCode          ierr;
653   const int               *cols;
654   const double            *vals;
655   int                     n,mnl,nnl,nz,rstart,rend;
656   PetscMPIInt             size,rank;
657   struct compressed_lines *rows;
658 
659   PetscFunctionBegin;
660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
661   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
662   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
663   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
664 
665   /*
666     not sure why a barrier is required. commenting out
667   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
668   */
669 
670   M = new_matrix((SPAI_Comm)comm);
671 
672   M->n              = n;
673   M->bs             = 1;
674   M->max_block_size = 1;
675 
676   M->mnls          = (int*)malloc(sizeof(int)*size);
677   M->start_indices = (int*)malloc(sizeof(int)*size);
678   M->pe            = (int*)malloc(sizeof(int)*n);
679   M->block_sizes   = (int*)malloc(sizeof(int)*n);
680   for (i=0; i<n; i++) M->block_sizes[i] = 1;
681 
682   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
683 
684   M->start_indices[0] = 0;
685   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
686 
687   M->mnl            = M->mnls[M->myid];
688   M->my_start_index = M->start_indices[M->myid];
689 
690   for (i=0; i<size; i++) {
691     start_indx = M->start_indices[i];
692     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
693   }
694 
695   if (AT) {
696     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
697   } else {
698     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
699   }
700 
701   rows = M->lines;
702 
703   /* Determine the mapping from global indices to pointers */
704   ierr       = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr);
705   pe         = 0;
706   local_indx = 0;
707   for (i=0; i<M->n; i++) {
708     if (local_indx >= M->mnls[pe]) {
709       pe++;
710       local_indx = 0;
711     }
712     mapping[i] = local_indx + M->start_indices[pe];
713     local_indx++;
714   }
715 
716   /*********************************************************/
717   /************** Set up the row structure *****************/
718   /*********************************************************/
719 
720   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
721   for (i=rstart; i<rend; i++) {
722     row_indx = i - rstart;
723     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
724     /* allocate buffers */
725     rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int));
726     rows->A[row_indx]    = (double*)malloc(nz*sizeof(double));
727     /* copy the matrix */
728     for (j=0; j<nz; j++) {
729       col = cols[j];
730       len = rows->len[row_indx]++;
731 
732       rows->ptrs[row_indx][len] = mapping[col];
733       rows->A[row_indx][len]    = vals[j];
734     }
735     rows->slen[row_indx] = rows->len[row_indx];
736 
737     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
738   }
739 
740 
741   /************************************************************/
742   /************** Set up the column structure *****************/
743   /*********************************************************/
744 
745   if (AT) {
746 
747     for (i=rstart; i<rend; i++) {
748       row_indx = i - rstart;
749       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
750       /* allocate buffers */
751       rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int));
752       /* copy the matrix (i.e., the structure) */
753       for (j=0; j<nz; j++) {
754         col = cols[j];
755         len = rows->rlen[row_indx]++;
756 
757         rows->rptrs[row_indx][len] = mapping[col];
758       }
759       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
760     }
761   }
762 
763   ierr = PetscFree(mapping);CHKERRQ(ierr);
764 
765   order_pointers(M);
766   M->maxnz = calc_maxnz(M);
767   *B       = M;
768   PetscFunctionReturn(0);
769 }
770 
771 /**********************************************************************/
772 
773 /*
774    Converts from an SPAI matrix B  to a PETSc matrix PB.
775    This assumes that the the SPAI matrix B is stored in
776    COMPRESSED-ROW format.
777 */
778 #undef __FUNCT__
779 #define __FUNCT__ "ConvertMatrixToMat"
780 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
781 {
782   PetscMPIInt    size,rank;
783   PetscErrorCode ierr;
784   int            m,n,M,N;
785   int            d_nz,o_nz;
786   int            *d_nnz,*o_nnz;
787   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
788   PetscScalar    val;
789 
790   PetscFunctionBegin;
791   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
792   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
793 
794   m    = n = B->mnls[rank];
795   d_nz = o_nz = 0;
796 
797   /* Determine preallocation for MatCreateMPIAIJ */
798   ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr);
799   ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr);
800   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
801   first_diag_col = B->start_indices[rank];
802   last_diag_col  = first_diag_col + B->mnls[rank];
803   for (i=0; i<B->mnls[rank]; i++) {
804     for (k=0; k<B->lines->len[i]; k++) {
805       global_col = B->lines->ptrs[i][k];
806       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
807       else o_nnz[i]++;
808     }
809   }
810 
811   M = N = B->n;
812   /* Here we only know how to create AIJ format */
813   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
814   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
815   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
816   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
817   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
818 
819   for (i=0; i<B->mnls[rank]; i++) {
820     global_row = B->start_indices[rank]+i;
821     for (k=0; k<B->lines->len[i]; k++) {
822       global_col = B->lines->ptrs[i][k];
823 
824       val  = B->lines->A[i][k];
825       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
826     }
827   }
828 
829   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
830   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
831 
832   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
833   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
834   PetscFunctionReturn(0);
835 }
836 
837 /**********************************************************************/
838 
839 /*
840    Converts from an SPAI vector v  to a PETSc vec Pv.
841 */
842 #undef __FUNCT__
843 #define __FUNCT__ "ConvertVectorToVec"
844 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
845 {
846   PetscErrorCode ierr;
847   PetscMPIInt    size,rank;
848   int            m,M,i,*mnls,*start_indices,*global_indices;
849 
850   PetscFunctionBegin;
851   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
852   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
853 
854   m = v->mnl;
855   M = v->n;
856 
857 
858   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
859 
860   ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr);
861   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
862 
863   ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr);
864 
865   start_indices[0] = 0;
866   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
867 
868   ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr);
869   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
870 
871   ierr = PetscFree(mnls);CHKERRQ(ierr);
872   ierr = PetscFree(start_indices);CHKERRQ(ierr);
873 
874   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
875   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
876   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
877 
878   ierr = PetscFree(global_indices);CHKERRQ(ierr);
879   PetscFunctionReturn(0);
880 }
881 
882 
883 
884 
885