xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 7b6bb2c608b6fc6714ef38fda02c2dbb91c82665)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *);
37 
38 typedef struct {
39 
40   matrix   *B;              /* matrix in SPAI format */
41   matrix   *BT;             /* transpose of matrix in SPAI format */
42   matrix   *M;              /* the approximate inverse in SPAI format */
43 
44   Mat      PM;              /* the approximate inverse PETSc format */
45 
46   double   epsilon;         /* tolerance */
47   int      nbsteps;         /* max number of "improvement" steps per line */
48   int      max;             /* max dimensions of is_I, q, etc. */
49   int      maxnew;          /* max number of new entries per step */
50   int      block_size;      /* constant block size */
51   int      cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
52   int      verbose;         /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69 
70   init_SPAI();
71 
72   if (ispai->sp) {
73     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
74   } else {
75     /* Use the transpose to get the column nonzero structure. */
76     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
77     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
78     ierr = MatDestroy(&AT);CHKERRQ(ierr);
79   }
80 
81   /* Destroy the transpose */
82   /* Don't know how to do it. PETSc developers? */
83 
84   /* construct SPAI preconditioner */
85   /* FILE *messages */     /* file for warning messages */
86   /* double epsilon */     /* tolerance */
87   /* int nbsteps */        /* max number of "improvement" steps per line */
88   /* int max */            /* max dimensions of is_I, q, etc. */
89   /* int maxnew */         /* max number of new entries per step */
90   /* int block_size */     /* block_size == 1 specifies scalar elments
91                               block_size == n specifies nxn constant-block elements
92                               block_size == 0 specifies variable-block elements */
93   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
94                            /* cache_size == 0 indicates no caching */
95   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
96                               verbose == 1 prints timing and matrix statistics */
97 
98   ierr = bspai(ispai->B,&ispai->M,
99 		   stdout,
100 		   ispai->epsilon,
101 		   ispai->nbsteps,
102 		   ispai->max,
103 		   ispai->maxnew,
104 		   ispai->block_size,
105 		   ispai->cache_size,
106 	       ispai->verbose);CHKERRQ(ierr);
107 
108   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
109 
110   /* free the SPAI matrices */
111   sp_free_matrix(ispai->B);
112   sp_free_matrix(ispai->M);
113 
114   PetscFunctionReturn(0);
115 }
116 
117 /**********************************************************************/
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCApply_SPAI"
121 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
122 {
123   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   /* Now using PETSc's multiply */
128   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /**********************************************************************/
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "PCDestroy_SPAI"
136 static PetscErrorCode PCDestroy_SPAI(PC pc)
137 {
138   PetscErrorCode ierr;
139   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
140 
141   PetscFunctionBegin;
142   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
143   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
144   ierr = PetscFree(pc->data);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 /**********************************************************************/
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "PCView_SPAI"
152 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
153 {
154   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
155   PetscErrorCode ierr;
156   PetscBool      iascii;
157 
158   PetscFunctionBegin;
159   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
160   if (iascii) {
161     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
169     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 EXTERN_C_BEGIN
175 #undef __FUNCT__
176 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
177 PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
178 {
179   PC_SPAI *ispai = (PC_SPAI*)pc->data;
180   PetscFunctionBegin;
181   ispai->epsilon = epsilon1;
182   PetscFunctionReturn(0);
183 }
184 EXTERN_C_END
185 
186 /**********************************************************************/
187 
188 EXTERN_C_BEGIN
189 #undef __FUNCT__
190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
191 PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
192 {
193   PC_SPAI *ispai = (PC_SPAI*)pc->data;
194   PetscFunctionBegin;
195   ispai->nbsteps = nbsteps1;
196   PetscFunctionReturn(0);
197 }
198 EXTERN_C_END
199 
200 /**********************************************************************/
201 
202 /* added 1/7/99 g.h. */
203 EXTERN_C_BEGIN
204 #undef __FUNCT__
205 #define __FUNCT__ "PCSPAISetMax_SPAI"
206 PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
207 {
208   PC_SPAI *ispai = (PC_SPAI*)pc->data;
209   PetscFunctionBegin;
210   ispai->max = max1;
211   PetscFunctionReturn(0);
212 }
213 EXTERN_C_END
214 
215 /**********************************************************************/
216 
217 EXTERN_C_BEGIN
218 #undef __FUNCT__
219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
220 PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221 {
222   PC_SPAI *ispai = (PC_SPAI*)pc->data;
223   PetscFunctionBegin;
224   ispai->maxnew = maxnew1;
225   PetscFunctionReturn(0);
226 }
227 EXTERN_C_END
228 
229 /**********************************************************************/
230 
231 EXTERN_C_BEGIN
232 #undef __FUNCT__
233 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
234 PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
235 {
236   PC_SPAI *ispai = (PC_SPAI*)pc->data;
237   PetscFunctionBegin;
238   ispai->block_size = block_size1;
239   PetscFunctionReturn(0);
240 }
241 EXTERN_C_END
242 
243 /**********************************************************************/
244 
245 EXTERN_C_BEGIN
246 #undef __FUNCT__
247 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
248 PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
249 {
250   PC_SPAI *ispai = (PC_SPAI*)pc->data;
251   PetscFunctionBegin;
252   ispai->cache_size = cache_size;
253   PetscFunctionReturn(0);
254 }
255 EXTERN_C_END
256 
257 /**********************************************************************/
258 
259 EXTERN_C_BEGIN
260 #undef __FUNCT__
261 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
262 PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
263 {
264   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
265   PetscFunctionBegin;
266   ispai->verbose = verbose;
267   PetscFunctionReturn(0);
268 }
269 EXTERN_C_END
270 
271 /**********************************************************************/
272 
273 EXTERN_C_BEGIN
274 #undef __FUNCT__
275 #define __FUNCT__ "PCSPAISetSp_SPAI"
276 PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
277 {
278   PC_SPAI *ispai = (PC_SPAI*)pc->data;
279   PetscFunctionBegin;
280   ispai->sp = sp;
281   PetscFunctionReturn(0);
282 }
283 EXTERN_C_END
284 
285 /* -------------------------------------------------------------------*/
286 
287 #undef __FUNCT__
288 #define __FUNCT__ "PCSPAISetEpsilon"
289 /*@
290   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
291 
292   Input Parameters:
293 + pc - the preconditioner
294 - eps - epsilon (default .4)
295 
296   Notes:  Espilon must be between 0 and 1. It controls the
297                  quality of the approximation of M to the inverse of
298                  A. Higher values of epsilon lead to more work, more
299                  fill, and usually better preconditioners. In many
300                  cases the best choice of epsilon is the one that
301                  divides the total solution time equally between the
302                  preconditioner and the solver.
303 
304   Level: intermediate
305 
306 .seealso: PCSPAI, PCSetType()
307   @*/
308 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
309 {
310   PetscErrorCode ierr;
311   PetscFunctionBegin;
312   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /**********************************************************************/
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "PCSPAISetNBSteps"
320 /*@
321   PCSPAISetNBSteps - set maximum number of improvement steps per row in
322         the SPAI preconditioner
323 
324   Input Parameters:
325 + pc - the preconditioner
326 - n - number of steps (default 5)
327 
328   Notes:  SPAI constructs to approximation to every column of
329                  the exact inverse of A in a series of improvement
330                  steps. The quality of the approximation is determined
331                  by epsilon. If an approximation achieving an accuracy
332                  of epsilon is not obtained after ns steps, SPAI simply
333                  uses the best approximation constructed so far.
334 
335   Level: intermediate
336 
337 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
338 @*/
339 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
340 {
341   PetscErrorCode ierr;
342   PetscFunctionBegin;
343   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 /**********************************************************************/
348 
349 /* added 1/7/99 g.h. */
350 #undef __FUNCT__
351 #define __FUNCT__ "PCSPAISetMax"
352 /*@
353   PCSPAISetMax - set the size of various working buffers in
354         the SPAI preconditioner
355 
356   Input Parameters:
357 + pc - the preconditioner
358 - n - size (default is 5000)
359 
360   Level: intermediate
361 
362 .seealso: PCSPAI, PCSetType()
363 @*/
364 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
365 {
366   PetscErrorCode ierr;
367   PetscFunctionBegin;
368   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 
372 /**********************************************************************/
373 
374 #undef __FUNCT__
375 #define __FUNCT__ "PCSPAISetMaxNew"
376 /*@
377   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
378    in SPAI preconditioner
379 
380   Input Parameters:
381 + pc - the preconditioner
382 - n - maximum number (default 5)
383 
384   Level: intermediate
385 
386 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
387 @*/
388 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
389 {
390   PetscErrorCode ierr;
391   PetscFunctionBegin;
392   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 /**********************************************************************/
397 
398 #undef __FUNCT__
399 #define __FUNCT__ "PCSPAISetBlockSize"
400 /*@
401   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
402 
403   Input Parameters:
404 + pc - the preconditioner
405 - n - block size (default 1)
406 
407   Notes: A block
408                  size of 1 treats A as a matrix of scalar elements. A
409                  block size of s > 1 treats A as a matrix of sxs
410                  blocks. A block size of 0 treats A as a matrix with
411                  variable sized blocks, which are determined by
412                  searching for dense square diagonal blocks in A.
413                  This can be very effective for finite-element
414                  matrices.
415 
416                  SPAI will convert A to block form, use a block
417                  version of the preconditioner algorithm, and then
418                  convert the result back to scalar form.
419 
420                  In many cases the a block-size parameter other than 1
421                  can lead to very significant improvement in
422                  performance.
423 
424 
425   Level: intermediate
426 
427 .seealso: PCSPAI, PCSetType()
428 @*/
429 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
430 {
431   PetscErrorCode ierr;
432   PetscFunctionBegin;
433   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 /**********************************************************************/
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "PCSPAISetCacheSize"
441 /*@
442   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
443 
444   Input Parameters:
445 + pc - the preconditioner
446 - n -  cache size {0,1,2,3,4,5} (default 5)
447 
448   Notes:    SPAI uses a hash table to cache messages and avoid
449                  redundant communication. If suggest always using
450                  5. This parameter is irrelevant in the serial
451                  version.
452 
453   Level: intermediate
454 
455 .seealso: PCSPAI, PCSetType()
456 @*/
457 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
458 {
459   PetscErrorCode ierr;
460   PetscFunctionBegin;
461   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 /**********************************************************************/
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "PCSPAISetVerbose"
469 /*@
470   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
471 
472   Input Parameters:
473 + pc - the preconditioner
474 - n - level (default 1)
475 
476   Notes: print parameters, timings and matrix statistics
477 
478   Level: intermediate
479 
480 .seealso: PCSPAI, PCSetType()
481 @*/
482 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
483 {
484   PetscErrorCode ierr;
485   PetscFunctionBegin;
486   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
487   PetscFunctionReturn(0);
488 }
489 
490 /**********************************************************************/
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "PCSPAISetSp"
494 /*@
495   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
496 
497   Input Parameters:
498 + pc - the preconditioner
499 - n - 0 or 1
500 
501   Notes: If A has a symmetric nonzero pattern use -sp 1 to
502                  improve performance by eliminating some communication
503                  in the parallel version. Even if A does not have a
504                  symmetric nonzero pattern -sp 1 may well lead to good
505                  results, but the code will not follow the published
506                  SPAI algorithm exactly.
507 
508 
509   Level: intermediate
510 
511 .seealso: PCSPAI, PCSetType()
512 @*/
513 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
514 {
515   PetscErrorCode ierr;
516   PetscFunctionBegin;
517   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 /**********************************************************************/
522 
523 /**********************************************************************/
524 
525 #undef __FUNCT__
526 #define __FUNCT__ "PCSetFromOptions_SPAI"
527 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
528 {
529   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
530   PetscErrorCode ierr;
531   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
532   double         epsilon1;
533   PetscBool      flg;
534 
535   PetscFunctionBegin;
536   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
537     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
538     if (flg) {
539       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
540     }
541     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
542     if (flg) {
543       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
544     }
545     /* added 1/7/99 g.h. */
546     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
547     if (flg) {
548       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
549     }
550     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
551     if (flg) {
552       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
553     }
554     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
555     if (flg) {
556       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
557     }
558     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
559     if (flg) {
560       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
561     }
562     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
563     if (flg) {
564       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
565     }
566     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
567     if (flg) {
568       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
569     }
570   ierr = PetscOptionsTail();CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 /**********************************************************************/
575 
576 /*MC
577    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
578      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
579 
580    Options Database Keys:
581 +  -pc_spai_epsilon <eps> - set tolerance
582 .  -pc_spai_nbstep <n> - set nbsteps
583 .  -pc_spai_max <m> - set max
584 .  -pc_spai_max_new <m> - set maxnew
585 .  -pc_spai_block_size <n> - set block size
586 .  -pc_spai_cache_size <n> - set cache size
587 .  -pc_spai_sp <m> - set sp
588 -  -pc_spai_set_verbose <true,false> - verbose output
589 
590    Notes: This only works with AIJ matrices.
591 
592    Level: beginner
593 
594    Concepts: approximate inverse
595 
596 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
597     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
598     PCSPAISetVerbose(), PCSPAISetSp()
599 M*/
600 
601 EXTERN_C_BEGIN
602 #undef __FUNCT__
603 #define __FUNCT__ "PCCreate_SPAI"
604 PetscErrorCode  PCCreate_SPAI(PC pc)
605 {
606   PC_SPAI        *ispai;
607   PetscErrorCode ierr;
608 
609   PetscFunctionBegin;
610   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
611   pc->data           = ispai;
612 
613   pc->ops->destroy         = PCDestroy_SPAI;
614   pc->ops->apply           = PCApply_SPAI;
615   pc->ops->applyrichardson = 0;
616   pc->ops->setup           = PCSetUp_SPAI;
617   pc->ops->view            = PCView_SPAI;
618   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
619 
620   ispai->epsilon    = .4;
621   ispai->nbsteps    = 5;
622   ispai->max        = 5000;
623   ispai->maxnew     = 5;
624   ispai->block_size = 1;
625   ispai->cache_size = 5;
626   ispai->verbose    = 0;
627 
628   ispai->sp         = 1;
629   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
630 
631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
632                     "PCSPAISetEpsilon_SPAI",
633                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
635                     "PCSPAISetNBSteps_SPAI",
636                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
637   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
638                     "PCSPAISetMax_SPAI",
639                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
640   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
641                     "PCSPAISetMaxNew_SPAI",
642                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
643   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
644                     "PCSPAISetBlockSize_SPAI",
645                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
646   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
647                     "PCSPAISetCacheSize_SPAI",
648                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
649   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
650                     "PCSPAISetVerbose_SPAI",
651                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
652   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
653                     "PCSPAISetSp_SPAI",
654                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
655 
656   PetscFunctionReturn(0);
657 }
658 EXTERN_C_END
659 
660 /**********************************************************************/
661 
662 /*
663    Converts from a PETSc matrix to an SPAI matrix
664 */
665 #undef __FUNCT__
666 #define __FUNCT__ "ConvertMatToMatrix"
667 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
668 {
669   matrix                  *M;
670   int                     i,j,col;
671   int                     row_indx;
672   int                     len,pe,local_indx,start_indx;
673   int                     *mapping;
674   PetscErrorCode          ierr;
675   const int               *cols;
676   const double            *vals;
677   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
678   PetscMPIInt             size,rank;
679   struct compressed_lines *rows;
680 
681   PetscFunctionBegin;
682 
683   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
684   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
685   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
686   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
687 
688   /*
689     not sure why a barrier is required. commenting out
690   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
691   */
692 
693   M = new_matrix((SPAI_Comm)comm);
694 
695   M->n = n;
696   M->bs = 1;
697   M->max_block_size = 1;
698 
699   M->mnls          = (int*)malloc(sizeof(int)*size);
700   M->start_indices = (int*)malloc(sizeof(int)*size);
701   M->pe            = (int*)malloc(sizeof(int)*n);
702   M->block_sizes   = (int*)malloc(sizeof(int)*n);
703   for (i=0; i<n; i++) M->block_sizes[i] = 1;
704 
705   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
706 
707   M->start_indices[0] = 0;
708   for (i=1; i<size; i++) {
709     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
710   }
711 
712   M->mnl = M->mnls[M->myid];
713   M->my_start_index = M->start_indices[M->myid];
714 
715   for (i=0; i<size; i++) {
716     start_indx = M->start_indices[i];
717     for (j=0; j<M->mnls[i]; j++)
718       M->pe[start_indx+j] = i;
719   }
720 
721   if (AT) {
722     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
723   } else {
724     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
725   }
726 
727   rows = M->lines;
728 
729   /* Determine the mapping from global indices to pointers */
730   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
731   pe         = 0;
732   local_indx = 0;
733   for (i=0; i<M->n; i++) {
734     if (local_indx >= M->mnls[pe]) {
735       pe++;
736       local_indx = 0;
737     }
738     mapping[i] = local_indx + M->start_indices[pe];
739     local_indx++;
740   }
741 
742 
743   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
744 
745   /*********************************************************/
746   /************** Set up the row structure *****************/
747   /*********************************************************/
748 
749   /* count number of nonzeros in every row */
750   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
751   for (i=rstart; i<rend; i++) {
752     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
753     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
754   }
755 
756   /* allocate buffers */
757   len = 0;
758   for (i=0; i<mnl; i++) {
759     if (len < num_ptr[i]) len = num_ptr[i];
760   }
761 
762   for (i=rstart; i<rend; i++) {
763     row_indx             = i-rstart;
764     len                  = num_ptr[row_indx];
765     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
766     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
767   }
768 
769   /* copy the matrix */
770   for (i=rstart; i<rend; i++) {
771     row_indx = i - rstart;
772     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
773     for (j=0; j<nz; j++) {
774       col = cols[j];
775       len = rows->len[row_indx]++;
776       rows->ptrs[row_indx][len] = mapping[col];
777       rows->A[row_indx][len]    = vals[j];
778     }
779     rows->slen[row_indx] = rows->len[row_indx];
780     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
781   }
782 
783 
784   /************************************************************/
785   /************** Set up the column structure *****************/
786   /*********************************************************/
787 
788   if (AT) {
789 
790     /* count number of nonzeros in every column */
791     for (i=rstart; i<rend; i++) {
792       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
793       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
794     }
795 
796     /* allocate buffers */
797     len = 0;
798     for (i=0; i<mnl; i++) {
799       if (len < num_ptr[i]) len = num_ptr[i];
800     }
801 
802     for (i=rstart; i<rend; i++) {
803       row_indx = i-rstart;
804       len      = num_ptr[row_indx];
805       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
806     }
807 
808     /* copy the matrix (i.e., the structure) */
809     for (i=rstart; i<rend; i++) {
810       row_indx = i - rstart;
811       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
812       for (j=0; j<nz; j++) {
813 	col = cols[j];
814 	len = rows->rlen[row_indx]++;
815 	rows->rptrs[row_indx][len] = mapping[col];
816       }
817       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
818     }
819   }
820 
821   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
822   ierr = PetscFree(mapping);CHKERRQ(ierr);
823 
824   order_pointers(M);
825   M->maxnz = calc_maxnz(M);
826 
827   *B = M;
828 
829   PetscFunctionReturn(0);
830 }
831 
832 /**********************************************************************/
833 
834 /*
835    Converts from an SPAI matrix B  to a PETSc matrix PB.
836    This assumes that the the SPAI matrix B is stored in
837    COMPRESSED-ROW format.
838 */
839 #undef __FUNCT__
840 #define __FUNCT__ "ConvertMatrixToMat"
841 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
842 {
843   PetscMPIInt    size,rank;
844   PetscErrorCode ierr;
845   int            m,n,M,N;
846   int            d_nz,o_nz;
847   int            *d_nnz,*o_nnz;
848   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
849   PetscScalar    val;
850 
851   PetscFunctionBegin;
852   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
853   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
854 
855   m = n = B->mnls[rank];
856   d_nz = o_nz = 0;
857 
858   /* Determine preallocation for MatCreateMPIAIJ */
859   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
860   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
861   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
862   first_diag_col = B->start_indices[rank];
863   last_diag_col = first_diag_col + B->mnls[rank];
864   for (i=0; i<B->mnls[rank]; i++) {
865     for (k=0; k<B->lines->len[i]; k++) {
866       global_col = B->lines->ptrs[i][k];
867       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
868 	d_nnz[i]++;
869       else
870 	o_nnz[i]++;
871     }
872   }
873 
874   M = N = B->n;
875   /* Here we only know how to create AIJ format */
876   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
877   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
878   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
879   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
880   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
881 
882   for (i=0; i<B->mnls[rank]; i++) {
883     global_row = B->start_indices[rank]+i;
884     for (k=0; k<B->lines->len[i]; k++) {
885       global_col = B->lines->ptrs[i][k];
886       val = B->lines->A[i][k];
887       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
888     }
889   }
890 
891   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
892   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
893 
894   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896 
897   PetscFunctionReturn(0);
898 }
899 
900 /**********************************************************************/
901 
902 /*
903    Converts from an SPAI vector v  to a PETSc vec Pv.
904 */
905 #undef __FUNCT__
906 #define __FUNCT__ "ConvertVectorToVec"
907 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
908 {
909   PetscErrorCode ierr;
910   PetscMPIInt    size,rank;
911   int            m,M,i,*mnls,*start_indices,*global_indices;
912 
913   PetscFunctionBegin;
914   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
915   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
916 
917   m = v->mnl;
918   M = v->n;
919 
920 
921   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
922 
923   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
924   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
925 
926   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
927   start_indices[0] = 0;
928   for (i=1; i<size; i++)
929     start_indices[i] = start_indices[i-1] +mnls[i-1];
930 
931   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
932   for (i=0; i<v->mnl; i++)
933     global_indices[i] = start_indices[rank] + i;
934 
935   ierr = PetscFree(mnls);CHKERRQ(ierr);
936   ierr = PetscFree(start_indices);CHKERRQ(ierr);
937 
938   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
939   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
940   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
941 
942   ierr = PetscFree(global_indices);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 
947 
948 
949