xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 01a79839fc82a7dabb7a87cd2a8bb532c6bfa88d)
1 #define PETSCKSP_DLL
2 
3 /*
4    3/99 Modified by Stephen Barnard to support SPAI version 3.0
5 */
6 
7 /*
8       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9    Code written by Stephen Barnard.
10 
11       Note: there is some BAD memory bleeding below!
12 
13       This code needs work
14 
15    1) get rid of all memory bleeding
16    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17       rather than having the sp flag for PC_SPAI
18    3) fix to set the block size based on the matrix block size
19 
20 */
21 
22 #include "private/pcimpl.h"        /*I "petscpc.h" I*/
23 #include "petscspai.h"
24 
25 /*
26     These are the SPAI include files
27 */
28 EXTERN_C_BEGIN
29 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
30 #include "spai.h"
31 #include "matrix.h"
32 EXTERN_C_END
33 
34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
37 extern PetscErrorCode MM_to_PETSC(char *,char *,char *);
38 
39 typedef struct {
40 
41   matrix   *B;              /* matrix in SPAI format */
42   matrix   *BT;             /* transpose of matrix in SPAI format */
43   matrix   *M;              /* the approximate inverse in SPAI format */
44 
45   Mat      PM;              /* the approximate inverse PETSc format */
46 
47   double   epsilon;         /* tolerance */
48   int      nbsteps;         /* max number of "improvement" steps per line */
49   int      max;             /* max dimensions of is_I, q, etc. */
50   int      maxnew;          /* max number of new entries per step */
51   int      block_size;      /* constant block size */
52   int      cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
53   int      verbose;         /* SPAI prints timing and statistics */
54 
55   int      sp;              /* symmetric nonzero pattern */
56   MPI_Comm comm_spai;     /* communicator to be used with spai */
57 } PC_SPAI;
58 
59 /**********************************************************************/
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "PCSetUp_SPAI"
63 static PetscErrorCode PCSetUp_SPAI(PC pc)
64 {
65   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
66   PetscErrorCode ierr;
67   Mat            AT;
68 
69   PetscFunctionBegin;
70 
71   init_SPAI();
72 
73   if (ispai->sp) {
74     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
75   } else {
76     /* Use the transpose to get the column nonzero structure. */
77     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
78     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
79     ierr = MatDestroy(AT);CHKERRQ(ierr);
80   }
81 
82   /* Destroy the transpose */
83   /* Don't know how to do it. PETSc developers? */
84 
85   /* construct SPAI preconditioner */
86   /* FILE *messages */     /* file for warning messages */
87   /* double epsilon */     /* tolerance */
88   /* int nbsteps */        /* max number of "improvement" steps per line */
89   /* int max */            /* max dimensions of is_I, q, etc. */
90   /* int maxnew */         /* max number of new entries per step */
91   /* int block_size */     /* block_size == 1 specifies scalar elments
92                               block_size == n specifies nxn constant-block elements
93                               block_size == 0 specifies variable-block elements */
94   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
95                            /* cache_size == 0 indicates no caching */
96   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
97                               verbose == 1 prints timing and matrix statistics */
98 
99   ierr = bspai(ispai->B,&ispai->M,
100 		   stdout,
101 		   ispai->epsilon,
102 		   ispai->nbsteps,
103 		   ispai->max,
104 		   ispai->maxnew,
105 		   ispai->block_size,
106 		   ispai->cache_size,
107 	       ispai->verbose);CHKERRQ(ierr);
108 
109   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
110 
111   /* free the SPAI matrices */
112   sp_free_matrix(ispai->B);
113   sp_free_matrix(ispai->M);
114 
115   PetscFunctionReturn(0);
116 }
117 
118 /**********************************************************************/
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "PCApply_SPAI"
122 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
123 {
124   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
125   PetscErrorCode ierr;
126 
127   PetscFunctionBegin;
128   /* Now using PETSc's multiply */
129   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 /**********************************************************************/
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "PCDestroy_SPAI"
137 static PetscErrorCode PCDestroy_SPAI(PC pc)
138 {
139   PetscErrorCode ierr;
140   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
141 
142   PetscFunctionBegin;
143   if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);}
144   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
145   ierr = PetscFree(ispai);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 /**********************************************************************/
150 
151 #undef __FUNCT__
152 #define __FUNCT__ "PCView_SPAI"
153 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
154 {
155   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
156   PetscErrorCode ierr;
157   PetscBool      iascii;
158 
159   PetscFunctionBegin;
160   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
161   if (iascii) {
162     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
169     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
170     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 EXTERN_C_BEGIN
176 #undef __FUNCT__
177 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
178 PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
179 {
180   PC_SPAI *ispai = (PC_SPAI*)pc->data;
181   PetscFunctionBegin;
182   ispai->epsilon = epsilon1;
183   PetscFunctionReturn(0);
184 }
185 EXTERN_C_END
186 
187 /**********************************************************************/
188 
189 EXTERN_C_BEGIN
190 #undef __FUNCT__
191 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
192 PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
193 {
194   PC_SPAI *ispai = (PC_SPAI*)pc->data;
195   PetscFunctionBegin;
196   ispai->nbsteps = nbsteps1;
197   PetscFunctionReturn(0);
198 }
199 EXTERN_C_END
200 
201 /**********************************************************************/
202 
203 /* added 1/7/99 g.h. */
204 EXTERN_C_BEGIN
205 #undef __FUNCT__
206 #define __FUNCT__ "PCSPAISetMax_SPAI"
207 PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
208 {
209   PC_SPAI *ispai = (PC_SPAI*)pc->data;
210   PetscFunctionBegin;
211   ispai->max = max1;
212   PetscFunctionReturn(0);
213 }
214 EXTERN_C_END
215 
216 /**********************************************************************/
217 
218 EXTERN_C_BEGIN
219 #undef __FUNCT__
220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
221 PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
222 {
223   PC_SPAI *ispai = (PC_SPAI*)pc->data;
224   PetscFunctionBegin;
225   ispai->maxnew = maxnew1;
226   PetscFunctionReturn(0);
227 }
228 EXTERN_C_END
229 
230 /**********************************************************************/
231 
232 EXTERN_C_BEGIN
233 #undef __FUNCT__
234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
235 PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
236 {
237   PC_SPAI *ispai = (PC_SPAI*)pc->data;
238   PetscFunctionBegin;
239   ispai->block_size = block_size1;
240   PetscFunctionReturn(0);
241 }
242 EXTERN_C_END
243 
244 /**********************************************************************/
245 
246 EXTERN_C_BEGIN
247 #undef __FUNCT__
248 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
249 PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
250 {
251   PC_SPAI *ispai = (PC_SPAI*)pc->data;
252   PetscFunctionBegin;
253   ispai->cache_size = cache_size;
254   PetscFunctionReturn(0);
255 }
256 EXTERN_C_END
257 
258 /**********************************************************************/
259 
260 EXTERN_C_BEGIN
261 #undef __FUNCT__
262 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
263 PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
264 {
265   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
266   PetscFunctionBegin;
267   ispai->verbose = verbose;
268   PetscFunctionReturn(0);
269 }
270 EXTERN_C_END
271 
272 /**********************************************************************/
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "PCSPAISetSp_SPAI"
277 PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
278 {
279   PC_SPAI *ispai = (PC_SPAI*)pc->data;
280   PetscFunctionBegin;
281   ispai->sp = sp;
282   PetscFunctionReturn(0);
283 }
284 EXTERN_C_END
285 
286 /* -------------------------------------------------------------------*/
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "PCSPAISetEpsilon"
290 /*@
291   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
292 
293   Input Parameters:
294 + pc - the preconditioner
295 - eps - epsilon (default .4)
296 
297   Notes:  Espilon must be between 0 and 1. It controls the
298                  quality of the approximation of M to the inverse of
299                  A. Higher values of epsilon lead to more work, more
300                  fill, and usually better preconditioners. In many
301                  cases the best choice of epsilon is the one that
302                  divides the total solution time equally between the
303                  preconditioner and the solver.
304 
305   Level: intermediate
306 
307 .seealso: PCSPAI, PCSetType()
308   @*/
309 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
310 {
311   PetscErrorCode ierr;
312   PetscFunctionBegin;
313   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 /**********************************************************************/
318 
319 #undef __FUNCT__
320 #define __FUNCT__ "PCSPAISetNBSteps"
321 /*@
322   PCSPAISetNBSteps - set maximum number of improvement steps per row in
323         the SPAI preconditioner
324 
325   Input Parameters:
326 + pc - the preconditioner
327 - n - number of steps (default 5)
328 
329   Notes:  SPAI constructs to approximation to every column of
330                  the exact inverse of A in a series of improvement
331                  steps. The quality of the approximation is determined
332                  by epsilon. If an approximation achieving an accuracy
333                  of epsilon is not obtained after ns steps, SPAI simply
334                  uses the best approximation constructed so far.
335 
336   Level: intermediate
337 
338 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
339 @*/
340 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
341 {
342   PetscErrorCode ierr;
343   PetscFunctionBegin;
344   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /**********************************************************************/
349 
350 /* added 1/7/99 g.h. */
351 #undef __FUNCT__
352 #define __FUNCT__ "PCSPAISetMax"
353 /*@
354   PCSPAISetMax - set the size of various working buffers in
355         the SPAI preconditioner
356 
357   Input Parameters:
358 + pc - the preconditioner
359 - n - size (default is 5000)
360 
361   Level: intermediate
362 
363 .seealso: PCSPAI, PCSetType()
364 @*/
365 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
366 {
367   PetscErrorCode ierr;
368   PetscFunctionBegin;
369   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
370   PetscFunctionReturn(0);
371 }
372 
373 /**********************************************************************/
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCSPAISetMaxNew"
377 /*@
378   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
379    in SPAI preconditioner
380 
381   Input Parameters:
382 + pc - the preconditioner
383 - n - maximum number (default 5)
384 
385   Level: intermediate
386 
387 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
388 @*/
389 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
390 {
391   PetscErrorCode ierr;
392   PetscFunctionBegin;
393   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /**********************************************************************/
398 
399 #undef __FUNCT__
400 #define __FUNCT__ "PCSPAISetBlockSize"
401 /*@
402   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
403 
404   Input Parameters:
405 + pc - the preconditioner
406 - n - block size (default 1)
407 
408   Notes: A block
409                  size of 1 treats A as a matrix of scalar elements. A
410                  block size of s > 1 treats A as a matrix of sxs
411                  blocks. A block size of 0 treats A as a matrix with
412                  variable sized blocks, which are determined by
413                  searching for dense square diagonal blocks in A.
414                  This can be very effective for finite-element
415                  matrices.
416 
417                  SPAI will convert A to block form, use a block
418                  version of the preconditioner algorithm, and then
419                  convert the result back to scalar form.
420 
421                  In many cases the a block-size parameter other than 1
422                  can lead to very significant improvement in
423                  performance.
424 
425 
426   Level: intermediate
427 
428 .seealso: PCSPAI, PCSetType()
429 @*/
430 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
431 {
432   PetscErrorCode ierr;
433   PetscFunctionBegin;
434   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 /**********************************************************************/
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "PCSPAISetCacheSize"
442 /*@
443   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
444 
445   Input Parameters:
446 + pc - the preconditioner
447 - n -  cache size {0,1,2,3,4,5} (default 5)
448 
449   Notes:    SPAI uses a hash table to cache messages and avoid
450                  redundant communication. If suggest always using
451                  5. This parameter is irrelevant in the serial
452                  version.
453 
454   Level: intermediate
455 
456 .seealso: PCSPAI, PCSetType()
457 @*/
458 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
459 {
460   PetscErrorCode ierr;
461   PetscFunctionBegin;
462   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 /**********************************************************************/
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "PCSPAISetVerbose"
470 /*@
471   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
472 
473   Input Parameters:
474 + pc - the preconditioner
475 - n - level (default 1)
476 
477   Notes: print parameters, timings and matrix statistics
478 
479   Level: intermediate
480 
481 .seealso: PCSPAI, PCSetType()
482 @*/
483 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
484 {
485   PetscErrorCode ierr;
486   PetscFunctionBegin;
487   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 /**********************************************************************/
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "PCSPAISetSp"
495 /*@
496   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
497 
498   Input Parameters:
499 + pc - the preconditioner
500 - n - 0 or 1
501 
502   Notes: If A has a symmetric nonzero pattern use -sp 1 to
503                  improve performance by eliminating some communication
504                  in the parallel version. Even if A does not have a
505                  symmetric nonzero pattern -sp 1 may well lead to good
506                  results, but the code will not follow the published
507                  SPAI algorithm exactly.
508 
509 
510   Level: intermediate
511 
512 .seealso: PCSPAI, PCSetType()
513 @*/
514 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
515 {
516   PetscErrorCode ierr;
517   PetscFunctionBegin;
518   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 /**********************************************************************/
523 
524 /**********************************************************************/
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "PCSetFromOptions_SPAI"
528 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
529 {
530   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
531   PetscErrorCode ierr;
532   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
533   double         epsilon1;
534   PetscBool      flg;
535 
536   PetscFunctionBegin;
537   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
538     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
539     if (flg) {
540       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
541     }
542     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
543     if (flg) {
544       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
545     }
546     /* added 1/7/99 g.h. */
547     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
548     if (flg) {
549       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
550     }
551     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
552     if (flg) {
553       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
554     }
555     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
556     if (flg) {
557       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
558     }
559     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
560     if (flg) {
561       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
562     }
563     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
564     if (flg) {
565       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
566     }
567     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
568     if (flg) {
569       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
570     }
571   ierr = PetscOptionsTail();CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 /**********************************************************************/
576 
577 /*MC
578    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
579      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
580 
581    Options Database Keys:
582 +  -pc_spai_epsilon <eps> - set tolerance
583 .  -pc_spai_nbstep <n> - set nbsteps
584 .  -pc_spai_max <m> - set max
585 .  -pc_spai_max_new <m> - set maxnew
586 .  -pc_spai_block_size <n> - set block size
587 .  -pc_spai_cache_size <n> - set cache size
588 .  -pc_spai_sp <m> - set sp
589 -  -pc_spai_set_verbose <true,false> - verbose output
590 
591    Notes: This only works with AIJ matrices.
592 
593    Level: beginner
594 
595    Concepts: approximate inverse
596 
597 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
598     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
599     PCSPAISetVerbose(), PCSPAISetSp()
600 M*/
601 
602 EXTERN_C_BEGIN
603 #undef __FUNCT__
604 #define __FUNCT__ "PCCreate_SPAI"
605 PetscErrorCode  PCCreate_SPAI(PC pc)
606 {
607   PC_SPAI        *ispai;
608   PetscErrorCode ierr;
609 
610   PetscFunctionBegin;
611   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
612   pc->data           = ispai;
613 
614   pc->ops->destroy         = PCDestroy_SPAI;
615   pc->ops->apply           = PCApply_SPAI;
616   pc->ops->applyrichardson = 0;
617   pc->ops->setup           = PCSetUp_SPAI;
618   pc->ops->view            = PCView_SPAI;
619   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
620 
621   ispai->epsilon    = .4;
622   ispai->nbsteps    = 5;
623   ispai->max        = 5000;
624   ispai->maxnew     = 5;
625   ispai->block_size = 1;
626   ispai->cache_size = 5;
627   ispai->verbose    = 0;
628 
629   ispai->sp         = 1;
630   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
631 
632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
633                     "PCSPAISetEpsilon_SPAI",
634                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
636                     "PCSPAISetNBSteps_SPAI",
637                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
639                     "PCSPAISetMax_SPAI",
640                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
641   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
642                     "PCSPAISetMaxNew_SPAI",
643                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
644   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
645                     "PCSPAISetBlockSize_SPAI",
646                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
647   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
648                     "PCSPAISetCacheSize_SPAI",
649                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
650   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
651                     "PCSPAISetVerbose_SPAI",
652                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
653   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
654                     "PCSPAISetSp_SPAI",
655                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
656 
657   PetscFunctionReturn(0);
658 }
659 EXTERN_C_END
660 
661 /**********************************************************************/
662 
663 /*
664    Converts from a PETSc matrix to an SPAI matrix
665 */
666 #undef __FUNCT__
667 #define __FUNCT__ "ConvertMatToMatrix"
668 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
669 {
670   matrix                  *M;
671   int                     i,j,col;
672   int                     row_indx;
673   int                     len,pe,local_indx,start_indx;
674   int                     *mapping;
675   PetscErrorCode          ierr;
676   const int               *cols;
677   const double            *vals;
678   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
679   PetscMPIInt             size,rank;
680   struct compressed_lines *rows;
681 
682   PetscFunctionBegin;
683 
684   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
685   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
686   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
687   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
688 
689   /*
690     not sure why a barrier is required. commenting out
691   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
692   */
693 
694   M = new_matrix((SPAI_Comm)comm);
695 
696   M->n = n;
697   M->bs = 1;
698   M->max_block_size = 1;
699 
700   M->mnls          = (int*)malloc(sizeof(int)*size);
701   M->start_indices = (int*)malloc(sizeof(int)*size);
702   M->pe            = (int*)malloc(sizeof(int)*n);
703   M->block_sizes   = (int*)malloc(sizeof(int)*n);
704   for (i=0; i<n; i++) M->block_sizes[i] = 1;
705 
706   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
707 
708   M->start_indices[0] = 0;
709   for (i=1; i<size; i++) {
710     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
711   }
712 
713   M->mnl = M->mnls[M->myid];
714   M->my_start_index = M->start_indices[M->myid];
715 
716   for (i=0; i<size; i++) {
717     start_indx = M->start_indices[i];
718     for (j=0; j<M->mnls[i]; j++)
719       M->pe[start_indx+j] = i;
720   }
721 
722   if (AT) {
723     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
724   } else {
725     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
726   }
727 
728   rows = M->lines;
729 
730   /* Determine the mapping from global indices to pointers */
731   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
732   pe         = 0;
733   local_indx = 0;
734   for (i=0; i<M->n; i++) {
735     if (local_indx >= M->mnls[pe]) {
736       pe++;
737       local_indx = 0;
738     }
739     mapping[i] = local_indx + M->start_indices[pe];
740     local_indx++;
741   }
742 
743 
744   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
745 
746   /*********************************************************/
747   /************** Set up the row structure *****************/
748   /*********************************************************/
749 
750   /* count number of nonzeros in every row */
751   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
752   for (i=rstart; i<rend; i++) {
753     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
754     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
755   }
756 
757   /* allocate buffers */
758   len = 0;
759   for (i=0; i<mnl; i++) {
760     if (len < num_ptr[i]) len = num_ptr[i];
761   }
762 
763   for (i=rstart; i<rend; i++) {
764     row_indx             = i-rstart;
765     len                  = num_ptr[row_indx];
766     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
767     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
768   }
769 
770   /* copy the matrix */
771   for (i=rstart; i<rend; i++) {
772     row_indx = i - rstart;
773     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
774     for (j=0; j<nz; j++) {
775       col = cols[j];
776       len = rows->len[row_indx]++;
777       rows->ptrs[row_indx][len] = mapping[col];
778       rows->A[row_indx][len]    = vals[j];
779     }
780     rows->slen[row_indx] = rows->len[row_indx];
781     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
782   }
783 
784 
785   /************************************************************/
786   /************** Set up the column structure *****************/
787   /*********************************************************/
788 
789   if (AT) {
790 
791     /* count number of nonzeros in every column */
792     for (i=rstart; i<rend; i++) {
793       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
794       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
795     }
796 
797     /* allocate buffers */
798     len = 0;
799     for (i=0; i<mnl; i++) {
800       if (len < num_ptr[i]) len = num_ptr[i];
801     }
802 
803     for (i=rstart; i<rend; i++) {
804       row_indx = i-rstart;
805       len      = num_ptr[row_indx];
806       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
807     }
808 
809     /* copy the matrix (i.e., the structure) */
810     for (i=rstart; i<rend; i++) {
811       row_indx = i - rstart;
812       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
813       for (j=0; j<nz; j++) {
814 	col = cols[j];
815 	len = rows->rlen[row_indx]++;
816 	rows->rptrs[row_indx][len] = mapping[col];
817       }
818       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
819     }
820   }
821 
822   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
823   ierr = PetscFree(mapping);CHKERRQ(ierr);
824 
825   order_pointers(M);
826   M->maxnz = calc_maxnz(M);
827 
828   *B = M;
829 
830   PetscFunctionReturn(0);
831 }
832 
833 /**********************************************************************/
834 
835 /*
836    Converts from an SPAI matrix B  to a PETSc matrix PB.
837    This assumes that the the SPAI matrix B is stored in
838    COMPRESSED-ROW format.
839 */
840 #undef __FUNCT__
841 #define __FUNCT__ "ConvertMatrixToMat"
842 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
843 {
844   PetscMPIInt    size,rank;
845   PetscErrorCode ierr;
846   int            m,n,M,N;
847   int            d_nz,o_nz;
848   int            *d_nnz,*o_nnz;
849   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
850   PetscScalar    val;
851 
852   PetscFunctionBegin;
853   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
854   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
855 
856   m = n = B->mnls[rank];
857   d_nz = o_nz = 0;
858 
859   /* Determine preallocation for MatCreateMPIAIJ */
860   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
861   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
862   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
863   first_diag_col = B->start_indices[rank];
864   last_diag_col = first_diag_col + B->mnls[rank];
865   for (i=0; i<B->mnls[rank]; i++) {
866     for (k=0; k<B->lines->len[i]; k++) {
867       global_col = B->lines->ptrs[i][k];
868       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
869 	d_nnz[i]++;
870       else
871 	o_nnz[i]++;
872     }
873   }
874 
875   M = N = B->n;
876   /* Here we only know how to create AIJ format */
877   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
878   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
879   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
880   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
881   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
882 
883   for (i=0; i<B->mnls[rank]; i++) {
884     global_row = B->start_indices[rank]+i;
885     for (k=0; k<B->lines->len[i]; k++) {
886       global_col = B->lines->ptrs[i][k];
887       val = B->lines->A[i][k];
888       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
889     }
890   }
891 
892   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
893   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
894 
895   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897 
898   PetscFunctionReturn(0);
899 }
900 
901 /**********************************************************************/
902 
903 /*
904    Converts from an SPAI vector v  to a PETSc vec Pv.
905 */
906 #undef __FUNCT__
907 #define __FUNCT__ "ConvertVectorToVec"
908 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
909 {
910   PetscErrorCode ierr;
911   PetscMPIInt    size,rank;
912   int            m,M,i,*mnls,*start_indices,*global_indices;
913 
914   PetscFunctionBegin;
915   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
916   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
917 
918   m = v->mnl;
919   M = v->n;
920 
921 
922   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
923 
924   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
925   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
926 
927   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
928   start_indices[0] = 0;
929   for (i=1; i<size; i++)
930     start_indices[i] = start_indices[i-1] +mnls[i-1];
931 
932   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
933   for (i=0; i<v->mnl; i++)
934     global_indices[i] = start_indices[rank] + i;
935 
936   ierr = PetscFree(mnls);CHKERRQ(ierr);
937   ierr = PetscFree(start_indices);CHKERRQ(ierr);
938 
939   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
940   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
941   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
942 
943   ierr = PetscFree(global_indices);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 
948 
949 
950