xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *);
37 
38 typedef struct {
39 
40   matrix   *B;              /* matrix in SPAI format */
41   matrix   *BT;             /* transpose of matrix in SPAI format */
42   matrix   *M;              /* the approximate inverse in SPAI format */
43 
44   Mat      PM;              /* the approximate inverse PETSc format */
45 
46   double   epsilon;         /* tolerance */
47   int      nbsteps;         /* max number of "improvement" steps per line */
48   int      max;             /* max dimensions of is_I, q, etc. */
49   int      maxnew;          /* max number of new entries per step */
50   int      block_size;      /* constant block size */
51   int      cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
52   int      verbose;         /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
93                            /* cache_size == 0 indicates no caching */
94   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
95                               verbose == 1 prints timing and matrix statistics */
96 
97   ierr = bspai(ispai->B,&ispai->M,
98                stdout,
99                ispai->epsilon,
100                ispai->nbsteps,
101                ispai->max,
102                ispai->maxnew,
103                ispai->block_size,
104                ispai->cache_size,
105                ispai->verbose);CHKERRQ(ierr);
106 
107   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
108 
109   /* free the SPAI matrices */
110   sp_free_matrix(ispai->B);
111   sp_free_matrix(ispai->M);
112 
113   PetscFunctionReturn(0);
114 }
115 
116 /**********************************************************************/
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "PCApply_SPAI"
120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
121 {
122   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   /* Now using PETSc's multiply */
127   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /**********************************************************************/
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "PCDestroy_SPAI"
135 static PetscErrorCode PCDestroy_SPAI(PC pc)
136 {
137   PetscErrorCode ierr;
138   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
139 
140   PetscFunctionBegin;
141   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
142   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
143   ierr = PetscFree(pc->data);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 /**********************************************************************/
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PCView_SPAI"
151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
152 {
153   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
154   PetscErrorCode ierr;
155   PetscBool      iascii;
156 
157   PetscFunctionBegin;
158   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
159   if (iascii) {
160     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 EXTERN_C_BEGIN
174 #undef __FUNCT__
175 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
176 PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
177 {
178   PC_SPAI *ispai = (PC_SPAI*)pc->data;
179   PetscFunctionBegin;
180   ispai->epsilon = epsilon1;
181   PetscFunctionReturn(0);
182 }
183 EXTERN_C_END
184 
185 /**********************************************************************/
186 
187 EXTERN_C_BEGIN
188 #undef __FUNCT__
189 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
190 PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
191 {
192   PC_SPAI *ispai = (PC_SPAI*)pc->data;
193   PetscFunctionBegin;
194   ispai->nbsteps = nbsteps1;
195   PetscFunctionReturn(0);
196 }
197 EXTERN_C_END
198 
199 /**********************************************************************/
200 
201 /* added 1/7/99 g.h. */
202 EXTERN_C_BEGIN
203 #undef __FUNCT__
204 #define __FUNCT__ "PCSPAISetMax_SPAI"
205 PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
206 {
207   PC_SPAI *ispai = (PC_SPAI*)pc->data;
208   PetscFunctionBegin;
209   ispai->max = max1;
210   PetscFunctionReturn(0);
211 }
212 EXTERN_C_END
213 
214 /**********************************************************************/
215 
216 EXTERN_C_BEGIN
217 #undef __FUNCT__
218 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
219 PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
220 {
221   PC_SPAI *ispai = (PC_SPAI*)pc->data;
222   PetscFunctionBegin;
223   ispai->maxnew = maxnew1;
224   PetscFunctionReturn(0);
225 }
226 EXTERN_C_END
227 
228 /**********************************************************************/
229 
230 EXTERN_C_BEGIN
231 #undef __FUNCT__
232 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
233 PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
234 {
235   PC_SPAI *ispai = (PC_SPAI*)pc->data;
236   PetscFunctionBegin;
237   ispai->block_size = block_size1;
238   PetscFunctionReturn(0);
239 }
240 EXTERN_C_END
241 
242 /**********************************************************************/
243 
244 EXTERN_C_BEGIN
245 #undef __FUNCT__
246 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
247 PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
248 {
249   PC_SPAI *ispai = (PC_SPAI*)pc->data;
250   PetscFunctionBegin;
251   ispai->cache_size = cache_size;
252   PetscFunctionReturn(0);
253 }
254 EXTERN_C_END
255 
256 /**********************************************************************/
257 
258 EXTERN_C_BEGIN
259 #undef __FUNCT__
260 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
261 PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
262 {
263   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
264   PetscFunctionBegin;
265   ispai->verbose = verbose;
266   PetscFunctionReturn(0);
267 }
268 EXTERN_C_END
269 
270 /**********************************************************************/
271 
272 EXTERN_C_BEGIN
273 #undef __FUNCT__
274 #define __FUNCT__ "PCSPAISetSp_SPAI"
275 PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
276 {
277   PC_SPAI *ispai = (PC_SPAI*)pc->data;
278   PetscFunctionBegin;
279   ispai->sp = sp;
280   PetscFunctionReturn(0);
281 }
282 EXTERN_C_END
283 
284 /* -------------------------------------------------------------------*/
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "PCSPAISetEpsilon"
288 /*@
289   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
290 
291   Input Parameters:
292 + pc - the preconditioner
293 - eps - epsilon (default .4)
294 
295   Notes:  Espilon must be between 0 and 1. It controls the
296                  quality of the approximation of M to the inverse of
297                  A. Higher values of epsilon lead to more work, more
298                  fill, and usually better preconditioners. In many
299                  cases the best choice of epsilon is the one that
300                  divides the total solution time equally between the
301                  preconditioner and the solver.
302 
303   Level: intermediate
304 
305 .seealso: PCSPAI, PCSetType()
306   @*/
307 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
308 {
309   PetscErrorCode ierr;
310   PetscFunctionBegin;
311   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 /**********************************************************************/
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "PCSPAISetNBSteps"
319 /*@
320   PCSPAISetNBSteps - set maximum number of improvement steps per row in
321         the SPAI preconditioner
322 
323   Input Parameters:
324 + pc - the preconditioner
325 - n - number of steps (default 5)
326 
327   Notes:  SPAI constructs to approximation to every column of
328                  the exact inverse of A in a series of improvement
329                  steps. The quality of the approximation is determined
330                  by epsilon. If an approximation achieving an accuracy
331                  of epsilon is not obtained after ns steps, SPAI simply
332                  uses the best approximation constructed so far.
333 
334   Level: intermediate
335 
336 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
337 @*/
338 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
339 {
340   PetscErrorCode ierr;
341   PetscFunctionBegin;
342   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 /**********************************************************************/
347 
348 /* added 1/7/99 g.h. */
349 #undef __FUNCT__
350 #define __FUNCT__ "PCSPAISetMax"
351 /*@
352   PCSPAISetMax - set the size of various working buffers in
353         the SPAI preconditioner
354 
355   Input Parameters:
356 + pc - the preconditioner
357 - n - size (default is 5000)
358 
359   Level: intermediate
360 
361 .seealso: PCSPAI, PCSetType()
362 @*/
363 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
364 {
365   PetscErrorCode ierr;
366   PetscFunctionBegin;
367   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 
371 /**********************************************************************/
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "PCSPAISetMaxNew"
375 /*@
376   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
377    in SPAI preconditioner
378 
379   Input Parameters:
380 + pc - the preconditioner
381 - n - maximum number (default 5)
382 
383   Level: intermediate
384 
385 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
386 @*/
387 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
388 {
389   PetscErrorCode ierr;
390   PetscFunctionBegin;
391   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
392   PetscFunctionReturn(0);
393 }
394 
395 /**********************************************************************/
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCSPAISetBlockSize"
399 /*@
400   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
401 
402   Input Parameters:
403 + pc - the preconditioner
404 - n - block size (default 1)
405 
406   Notes: A block
407                  size of 1 treats A as a matrix of scalar elements. A
408                  block size of s > 1 treats A as a matrix of sxs
409                  blocks. A block size of 0 treats A as a matrix with
410                  variable sized blocks, which are determined by
411                  searching for dense square diagonal blocks in A.
412                  This can be very effective for finite-element
413                  matrices.
414 
415                  SPAI will convert A to block form, use a block
416                  version of the preconditioner algorithm, and then
417                  convert the result back to scalar form.
418 
419                  In many cases the a block-size parameter other than 1
420                  can lead to very significant improvement in
421                  performance.
422 
423 
424   Level: intermediate
425 
426 .seealso: PCSPAI, PCSetType()
427 @*/
428 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
429 {
430   PetscErrorCode ierr;
431   PetscFunctionBegin;
432   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
433   PetscFunctionReturn(0);
434 }
435 
436 /**********************************************************************/
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "PCSPAISetCacheSize"
440 /*@
441   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
442 
443   Input Parameters:
444 + pc - the preconditioner
445 - n -  cache size {0,1,2,3,4,5} (default 5)
446 
447   Notes:    SPAI uses a hash table to cache messages and avoid
448                  redundant communication. If suggest always using
449                  5. This parameter is irrelevant in the serial
450                  version.
451 
452   Level: intermediate
453 
454 .seealso: PCSPAI, PCSetType()
455 @*/
456 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
457 {
458   PetscErrorCode ierr;
459   PetscFunctionBegin;
460   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 /**********************************************************************/
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "PCSPAISetVerbose"
468 /*@
469   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
470 
471   Input Parameters:
472 + pc - the preconditioner
473 - n - level (default 1)
474 
475   Notes: print parameters, timings and matrix statistics
476 
477   Level: intermediate
478 
479 .seealso: PCSPAI, PCSetType()
480 @*/
481 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
482 {
483   PetscErrorCode ierr;
484   PetscFunctionBegin;
485   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 /**********************************************************************/
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "PCSPAISetSp"
493 /*@
494   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
495 
496   Input Parameters:
497 + pc - the preconditioner
498 - n - 0 or 1
499 
500   Notes: If A has a symmetric nonzero pattern use -sp 1 to
501                  improve performance by eliminating some communication
502                  in the parallel version. Even if A does not have a
503                  symmetric nonzero pattern -sp 1 may well lead to good
504                  results, but the code will not follow the published
505                  SPAI algorithm exactly.
506 
507 
508   Level: intermediate
509 
510 .seealso: PCSPAI, PCSetType()
511 @*/
512 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
513 {
514   PetscErrorCode ierr;
515   PetscFunctionBegin;
516   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
517   PetscFunctionReturn(0);
518 }
519 
520 /**********************************************************************/
521 
522 /**********************************************************************/
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "PCSetFromOptions_SPAI"
526 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
527 {
528   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
529   PetscErrorCode ierr;
530   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
531   double         epsilon1;
532   PetscBool      flg;
533 
534   PetscFunctionBegin;
535   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
536     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
537     if (flg) {
538       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
539     }
540     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
541     if (flg) {
542       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
543     }
544     /* added 1/7/99 g.h. */
545     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
546     if (flg) {
547       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
548     }
549     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
550     if (flg) {
551       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
552     }
553     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
554     if (flg) {
555       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
556     }
557     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
558     if (flg) {
559       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
560     }
561     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
562     if (flg) {
563       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
564     }
565     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
566     if (flg) {
567       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
568     }
569   ierr = PetscOptionsTail();CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 /**********************************************************************/
574 
575 /*MC
576    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
577      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
578 
579    Options Database Keys:
580 +  -pc_spai_epsilon <eps> - set tolerance
581 .  -pc_spai_nbstep <n> - set nbsteps
582 .  -pc_spai_max <m> - set max
583 .  -pc_spai_max_new <m> - set maxnew
584 .  -pc_spai_block_size <n> - set block size
585 .  -pc_spai_cache_size <n> - set cache size
586 .  -pc_spai_sp <m> - set sp
587 -  -pc_spai_set_verbose <true,false> - verbose output
588 
589    Notes: This only works with AIJ matrices.
590 
591    Level: beginner
592 
593    Concepts: approximate inverse
594 
595 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
596     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
597     PCSPAISetVerbose(), PCSPAISetSp()
598 M*/
599 
600 EXTERN_C_BEGIN
601 #undef __FUNCT__
602 #define __FUNCT__ "PCCreate_SPAI"
603 PetscErrorCode  PCCreate_SPAI(PC pc)
604 {
605   PC_SPAI        *ispai;
606   PetscErrorCode ierr;
607 
608   PetscFunctionBegin;
609   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
610   pc->data           = ispai;
611 
612   pc->ops->destroy         = PCDestroy_SPAI;
613   pc->ops->apply           = PCApply_SPAI;
614   pc->ops->applyrichardson = 0;
615   pc->ops->setup           = PCSetUp_SPAI;
616   pc->ops->view            = PCView_SPAI;
617   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
618 
619   ispai->epsilon    = .4;
620   ispai->nbsteps    = 5;
621   ispai->max        = 5000;
622   ispai->maxnew     = 5;
623   ispai->block_size = 1;
624   ispai->cache_size = 5;
625   ispai->verbose    = 0;
626 
627   ispai->sp         = 1;
628   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
629 
630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
631                     "PCSPAISetEpsilon_SPAI",
632                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
634                     "PCSPAISetNBSteps_SPAI",
635                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
637                     "PCSPAISetMax_SPAI",
638                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
640                     "PCSPAISetMaxNew_SPAI",
641                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
643                     "PCSPAISetBlockSize_SPAI",
644                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
646                     "PCSPAISetCacheSize_SPAI",
647                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
649                     "PCSPAISetVerbose_SPAI",
650                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
652                     "PCSPAISetSp_SPAI",
653                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
654 
655   PetscFunctionReturn(0);
656 }
657 EXTERN_C_END
658 
659 /**********************************************************************/
660 
661 /*
662    Converts from a PETSc matrix to an SPAI matrix
663 */
664 #undef __FUNCT__
665 #define __FUNCT__ "ConvertMatToMatrix"
666 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
667 {
668   matrix                  *M;
669   int                     i,j,col;
670   int                     row_indx;
671   int                     len,pe,local_indx,start_indx;
672   int                     *mapping;
673   PetscErrorCode          ierr;
674   const int               *cols;
675   const double            *vals;
676   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
677   PetscMPIInt             size,rank;
678   struct compressed_lines *rows;
679 
680   PetscFunctionBegin;
681   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
682   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
683   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
684   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
685 
686   /*
687     not sure why a barrier is required. commenting out
688   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
689   */
690 
691   M = new_matrix((SPAI_Comm)comm);
692 
693   M->n = n;
694   M->bs = 1;
695   M->max_block_size = 1;
696 
697   M->mnls          = (int*)malloc(sizeof(int)*size);
698   M->start_indices = (int*)malloc(sizeof(int)*size);
699   M->pe            = (int*)malloc(sizeof(int)*n);
700   M->block_sizes   = (int*)malloc(sizeof(int)*n);
701   for (i=0; i<n; i++) M->block_sizes[i] = 1;
702 
703   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
704 
705   M->start_indices[0] = 0;
706   for (i=1; i<size; i++) {
707     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
708   }
709 
710   M->mnl = M->mnls[M->myid];
711   M->my_start_index = M->start_indices[M->myid];
712 
713   for (i=0; i<size; i++) {
714     start_indx = M->start_indices[i];
715     for (j=0; j<M->mnls[i]; j++)
716       M->pe[start_indx+j] = i;
717   }
718 
719   if (AT) {
720     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
721   } else {
722     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
723   }
724 
725   rows = M->lines;
726 
727   /* Determine the mapping from global indices to pointers */
728   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
729   pe         = 0;
730   local_indx = 0;
731   for (i=0; i<M->n; i++) {
732     if (local_indx >= M->mnls[pe]) {
733       pe++;
734       local_indx = 0;
735     }
736     mapping[i] = local_indx + M->start_indices[pe];
737     local_indx++;
738   }
739 
740 
741   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
742 
743   /*********************************************************/
744   /************** Set up the row structure *****************/
745   /*********************************************************/
746 
747   /* count number of nonzeros in every row */
748   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
749   for (i=rstart; i<rend; i++) {
750     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
751     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
752   }
753 
754   /* allocate buffers */
755   len = 0;
756   for (i=0; i<mnl; i++) {
757     if (len < num_ptr[i]) len = num_ptr[i];
758   }
759 
760   for (i=rstart; i<rend; i++) {
761     row_indx             = i-rstart;
762     len                  = num_ptr[row_indx];
763     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
764     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
765   }
766 
767   /* copy the matrix */
768   for (i=rstart; i<rend; i++) {
769     row_indx = i - rstart;
770     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
771     for (j=0; j<nz; j++) {
772       col = cols[j];
773       len = rows->len[row_indx]++;
774       rows->ptrs[row_indx][len] = mapping[col];
775       rows->A[row_indx][len]    = vals[j];
776     }
777     rows->slen[row_indx] = rows->len[row_indx];
778     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
779   }
780 
781 
782   /************************************************************/
783   /************** Set up the column structure *****************/
784   /*********************************************************/
785 
786   if (AT) {
787 
788     /* count number of nonzeros in every column */
789     for (i=rstart; i<rend; i++) {
790       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
791       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
792     }
793 
794     /* allocate buffers */
795     len = 0;
796     for (i=0; i<mnl; i++) {
797       if (len < num_ptr[i]) len = num_ptr[i];
798     }
799 
800     for (i=rstart; i<rend; i++) {
801       row_indx = i-rstart;
802       len      = num_ptr[row_indx];
803       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
804     }
805 
806     /* copy the matrix (i.e., the structure) */
807     for (i=rstart; i<rend; i++) {
808       row_indx = i - rstart;
809       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
810       for (j=0; j<nz; j++) {
811         col = cols[j];
812         len = rows->rlen[row_indx]++;
813         rows->rptrs[row_indx][len] = mapping[col];
814       }
815       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
816     }
817   }
818 
819   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
820   ierr = PetscFree(mapping);CHKERRQ(ierr);
821 
822   order_pointers(M);
823   M->maxnz = calc_maxnz(M);
824 
825   *B = M;
826 
827   PetscFunctionReturn(0);
828 }
829 
830 /**********************************************************************/
831 
832 /*
833    Converts from an SPAI matrix B  to a PETSc matrix PB.
834    This assumes that the the SPAI matrix B is stored in
835    COMPRESSED-ROW format.
836 */
837 #undef __FUNCT__
838 #define __FUNCT__ "ConvertMatrixToMat"
839 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
840 {
841   PetscMPIInt    size,rank;
842   PetscErrorCode ierr;
843   int            m,n,M,N;
844   int            d_nz,o_nz;
845   int            *d_nnz,*o_nnz;
846   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
847   PetscScalar    val;
848 
849   PetscFunctionBegin;
850   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
851   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
852 
853   m = n = B->mnls[rank];
854   d_nz = o_nz = 0;
855 
856   /* Determine preallocation for MatCreateMPIAIJ */
857   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
858   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
859   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
860   first_diag_col = B->start_indices[rank];
861   last_diag_col = first_diag_col + B->mnls[rank];
862   for (i=0; i<B->mnls[rank]; i++) {
863     for (k=0; k<B->lines->len[i]; k++) {
864       global_col = B->lines->ptrs[i][k];
865       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
866       else o_nnz[i]++;
867     }
868   }
869 
870   M = N = B->n;
871   /* Here we only know how to create AIJ format */
872   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
873   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
874   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
875   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
876   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
877 
878   for (i=0; i<B->mnls[rank]; i++) {
879     global_row = B->start_indices[rank]+i;
880     for (k=0; k<B->lines->len[i]; k++) {
881       global_col = B->lines->ptrs[i][k];
882       val = B->lines->A[i][k];
883       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
884     }
885   }
886 
887   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
888   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
889 
890   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
891   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 /**********************************************************************/
897 
898 /*
899    Converts from an SPAI vector v  to a PETSc vec Pv.
900 */
901 #undef __FUNCT__
902 #define __FUNCT__ "ConvertVectorToVec"
903 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
904 {
905   PetscErrorCode ierr;
906   PetscMPIInt    size,rank;
907   int            m,M,i,*mnls,*start_indices,*global_indices;
908 
909   PetscFunctionBegin;
910   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
911   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
912 
913   m = v->mnl;
914   M = v->n;
915 
916 
917   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
918 
919   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
920   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
921 
922   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
923   start_indices[0] = 0;
924   for (i=1; i<size; i++)
925     start_indices[i] = start_indices[i-1] +mnls[i-1];
926 
927   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
928   for (i=0; i<v->mnl; i++)
929     global_indices[i] = start_indices[rank] + i;
930 
931   ierr = PetscFree(mnls);CHKERRQ(ierr);
932   ierr = PetscFree(start_indices);CHKERRQ(ierr);
933 
934   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
935   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
936   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
937 
938   ierr = PetscFree(global_indices);CHKERRQ(ierr);
939   PetscFunctionReturn(0);
940 }
941 
942 
943 
944 
945