xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 009bbdc485cd9ad46be9940d3549e2dde9cdc322)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *);
37 
38 typedef struct {
39 
40   matrix   *B;              /* matrix in SPAI format */
41   matrix   *BT;             /* transpose of matrix in SPAI format */
42   matrix   *M;              /* the approximate inverse in SPAI format */
43 
44   Mat      PM;              /* the approximate inverse PETSc format */
45 
46   double   epsilon;         /* tolerance */
47   int      nbsteps;         /* max number of "improvement" steps per line */
48   int      max;             /* max dimensions of is_I, q, etc. */
49   int      maxnew;          /* max number of new entries per step */
50   int      block_size;      /* constant block size */
51   int      cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
52   int      verbose;         /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
93                            /* cache_size == 0 indicates no caching */
94   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
95                               verbose == 1 prints timing and matrix statistics */
96 
97   ierr = bspai(ispai->B,&ispai->M,
98                stdout,
99                ispai->epsilon,
100                ispai->nbsteps,
101                ispai->max,
102                ispai->maxnew,
103                ispai->block_size,
104                ispai->cache_size,
105                ispai->verbose);CHKERRQ(ierr);
106 
107   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
108 
109   /* free the SPAI matrices */
110   sp_free_matrix(ispai->B);
111   sp_free_matrix(ispai->M);
112   PetscFunctionReturn(0);
113 }
114 
115 /**********************************************************************/
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "PCApply_SPAI"
119 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
120 {
121   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   /* Now using PETSc's multiply */
126   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
127   PetscFunctionReturn(0);
128 }
129 
130 /**********************************************************************/
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "PCDestroy_SPAI"
134 static PetscErrorCode PCDestroy_SPAI(PC pc)
135 {
136   PetscErrorCode ierr;
137   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
138 
139   PetscFunctionBegin;
140   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
141   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
142   ierr = PetscFree(pc->data);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 /**********************************************************************/
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "PCView_SPAI"
150 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
151 {
152   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
153   PetscErrorCode ierr;
154   PetscBool      iascii;
155 
156   PetscFunctionBegin;
157   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
158   if (iascii) {
159     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 EXTERN_C_BEGIN
173 #undef __FUNCT__
174 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
175 PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
176 {
177   PC_SPAI *ispai = (PC_SPAI*)pc->data;
178 
179   PetscFunctionBegin;
180   ispai->epsilon = epsilon1;
181   PetscFunctionReturn(0);
182 }
183 EXTERN_C_END
184 
185 /**********************************************************************/
186 
187 EXTERN_C_BEGIN
188 #undef __FUNCT__
189 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
190 PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
191 {
192   PC_SPAI *ispai = (PC_SPAI*)pc->data;
193 
194   PetscFunctionBegin;
195   ispai->nbsteps = nbsteps1;
196   PetscFunctionReturn(0);
197 }
198 EXTERN_C_END
199 
200 /**********************************************************************/
201 
202 /* added 1/7/99 g.h. */
203 EXTERN_C_BEGIN
204 #undef __FUNCT__
205 #define __FUNCT__ "PCSPAISetMax_SPAI"
206 PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
207 {
208   PC_SPAI *ispai = (PC_SPAI*)pc->data;
209 
210   PetscFunctionBegin;
211   ispai->max = max1;
212   PetscFunctionReturn(0);
213 }
214 EXTERN_C_END
215 
216 /**********************************************************************/
217 
218 EXTERN_C_BEGIN
219 #undef __FUNCT__
220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
221 PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
222 {
223   PC_SPAI *ispai = (PC_SPAI*)pc->data;
224 
225   PetscFunctionBegin;
226   ispai->maxnew = maxnew1;
227   PetscFunctionReturn(0);
228 }
229 EXTERN_C_END
230 
231 /**********************************************************************/
232 
233 EXTERN_C_BEGIN
234 #undef __FUNCT__
235 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
236 PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
237 {
238   PC_SPAI *ispai = (PC_SPAI*)pc->data;
239 
240   PetscFunctionBegin;
241   ispai->block_size = block_size1;
242   PetscFunctionReturn(0);
243 }
244 EXTERN_C_END
245 
246 /**********************************************************************/
247 
248 EXTERN_C_BEGIN
249 #undef __FUNCT__
250 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
251 PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
252 {
253   PC_SPAI *ispai = (PC_SPAI*)pc->data;
254 
255   PetscFunctionBegin;
256   ispai->cache_size = cache_size;
257   PetscFunctionReturn(0);
258 }
259 EXTERN_C_END
260 
261 /**********************************************************************/
262 
263 EXTERN_C_BEGIN
264 #undef __FUNCT__
265 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
266 PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
267 {
268   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
269 
270   PetscFunctionBegin;
271   ispai->verbose = verbose;
272   PetscFunctionReturn(0);
273 }
274 EXTERN_C_END
275 
276 /**********************************************************************/
277 
278 EXTERN_C_BEGIN
279 #undef __FUNCT__
280 #define __FUNCT__ "PCSPAISetSp_SPAI"
281 PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
282 {
283   PC_SPAI *ispai = (PC_SPAI*)pc->data;
284 
285   PetscFunctionBegin;
286   ispai->sp = sp;
287   PetscFunctionReturn(0);
288 }
289 EXTERN_C_END
290 
291 /* -------------------------------------------------------------------*/
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "PCSPAISetEpsilon"
295 /*@
296   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
297 
298   Input Parameters:
299 + pc - the preconditioner
300 - eps - epsilon (default .4)
301 
302   Notes:  Espilon must be between 0 and 1. It controls the
303                  quality of the approximation of M to the inverse of
304                  A. Higher values of epsilon lead to more work, more
305                  fill, and usually better preconditioners. In many
306                  cases the best choice of epsilon is the one that
307                  divides the total solution time equally between the
308                  preconditioner and the solver.
309 
310   Level: intermediate
311 
312 .seealso: PCSPAI, PCSetType()
313   @*/
314 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /**********************************************************************/
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "PCSPAISetNBSteps"
327 /*@
328   PCSPAISetNBSteps - set maximum number of improvement steps per row in
329         the SPAI preconditioner
330 
331   Input Parameters:
332 + pc - the preconditioner
333 - n - number of steps (default 5)
334 
335   Notes:  SPAI constructs to approximation to every column of
336                  the exact inverse of A in a series of improvement
337                  steps. The quality of the approximation is determined
338                  by epsilon. If an approximation achieving an accuracy
339                  of epsilon is not obtained after ns steps, SPAI simply
340                  uses the best approximation constructed so far.
341 
342   Level: intermediate
343 
344 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
345 @*/
346 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
347 {
348   PetscErrorCode ierr;
349 
350   PetscFunctionBegin;
351   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 /**********************************************************************/
356 
357 /* added 1/7/99 g.h. */
358 #undef __FUNCT__
359 #define __FUNCT__ "PCSPAISetMax"
360 /*@
361   PCSPAISetMax - set the size of various working buffers in
362         the SPAI preconditioner
363 
364   Input Parameters:
365 + pc - the preconditioner
366 - n - size (default is 5000)
367 
368   Level: intermediate
369 
370 .seealso: PCSPAI, PCSetType()
371 @*/
372 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
373 {
374   PetscErrorCode ierr;
375 
376   PetscFunctionBegin;
377   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
378   PetscFunctionReturn(0);
379 }
380 
381 /**********************************************************************/
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "PCSPAISetMaxNew"
385 /*@
386   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
387    in SPAI preconditioner
388 
389   Input Parameters:
390 + pc - the preconditioner
391 - n - maximum number (default 5)
392 
393   Level: intermediate
394 
395 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
396 @*/
397 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
398 {
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
403   PetscFunctionReturn(0);
404 }
405 
406 /**********************************************************************/
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCSPAISetBlockSize"
410 /*@
411   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
412 
413   Input Parameters:
414 + pc - the preconditioner
415 - n - block size (default 1)
416 
417   Notes: A block
418                  size of 1 treats A as a matrix of scalar elements. A
419                  block size of s > 1 treats A as a matrix of sxs
420                  blocks. A block size of 0 treats A as a matrix with
421                  variable sized blocks, which are determined by
422                  searching for dense square diagonal blocks in A.
423                  This can be very effective for finite-element
424                  matrices.
425 
426                  SPAI will convert A to block form, use a block
427                  version of the preconditioner algorithm, and then
428                  convert the result back to scalar form.
429 
430                  In many cases the a block-size parameter other than 1
431                  can lead to very significant improvement in
432                  performance.
433 
434 
435   Level: intermediate
436 
437 .seealso: PCSPAI, PCSetType()
438 @*/
439 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
440 {
441   PetscErrorCode ierr;
442 
443   PetscFunctionBegin;
444   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 /**********************************************************************/
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "PCSPAISetCacheSize"
452 /*@
453   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
454 
455   Input Parameters:
456 + pc - the preconditioner
457 - n -  cache size {0,1,2,3,4,5} (default 5)
458 
459   Notes:    SPAI uses a hash table to cache messages and avoid
460                  redundant communication. If suggest always using
461                  5. This parameter is irrelevant in the serial
462                  version.
463 
464   Level: intermediate
465 
466 .seealso: PCSPAI, PCSetType()
467 @*/
468 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
469 {
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 /**********************************************************************/
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "PCSPAISetVerbose"
481 /*@
482   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
483 
484   Input Parameters:
485 + pc - the preconditioner
486 - n - level (default 1)
487 
488   Notes: print parameters, timings and matrix statistics
489 
490   Level: intermediate
491 
492 .seealso: PCSPAI, PCSetType()
493 @*/
494 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
495 {
496   PetscErrorCode ierr;
497 
498   PetscFunctionBegin;
499   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 /**********************************************************************/
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "PCSPAISetSp"
507 /*@
508   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
509 
510   Input Parameters:
511 + pc - the preconditioner
512 - n - 0 or 1
513 
514   Notes: If A has a symmetric nonzero pattern use -sp 1 to
515                  improve performance by eliminating some communication
516                  in the parallel version. Even if A does not have a
517                  symmetric nonzero pattern -sp 1 may well lead to good
518                  results, but the code will not follow the published
519                  SPAI algorithm exactly.
520 
521 
522   Level: intermediate
523 
524 .seealso: PCSPAI, PCSetType()
525 @*/
526 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
527 {
528   PetscErrorCode ierr;
529 
530   PetscFunctionBegin;
531   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 /**********************************************************************/
536 
537 /**********************************************************************/
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "PCSetFromOptions_SPAI"
541 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
542 {
543   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
544   PetscErrorCode ierr;
545   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
546   double         epsilon1;
547   PetscBool      flg;
548 
549   PetscFunctionBegin;
550   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
551     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
552     if (flg) {
553       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
554     }
555     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
556     if (flg) {
557       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
558     }
559     /* added 1/7/99 g.h. */
560     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
561     if (flg) {
562       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
563     }
564     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
565     if (flg) {
566       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
567     }
568     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
569     if (flg) {
570       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
571     }
572     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
573     if (flg) {
574       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
575     }
576     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
577     if (flg) {
578       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
579     }
580     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
581     if (flg) {
582       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
583     }
584   ierr = PetscOptionsTail();CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 /**********************************************************************/
589 
590 /*MC
591    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
592      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
593 
594    Options Database Keys:
595 +  -pc_spai_epsilon <eps> - set tolerance
596 .  -pc_spai_nbstep <n> - set nbsteps
597 .  -pc_spai_max <m> - set max
598 .  -pc_spai_max_new <m> - set maxnew
599 .  -pc_spai_block_size <n> - set block size
600 .  -pc_spai_cache_size <n> - set cache size
601 .  -pc_spai_sp <m> - set sp
602 -  -pc_spai_set_verbose <true,false> - verbose output
603 
604    Notes: This only works with AIJ matrices.
605 
606    Level: beginner
607 
608    Concepts: approximate inverse
609 
610 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
611     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
612     PCSPAISetVerbose(), PCSPAISetSp()
613 M*/
614 
615 EXTERN_C_BEGIN
616 #undef __FUNCT__
617 #define __FUNCT__ "PCCreate_SPAI"
618 PetscErrorCode  PCCreate_SPAI(PC pc)
619 {
620   PC_SPAI        *ispai;
621   PetscErrorCode ierr;
622 
623   PetscFunctionBegin;
624   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
625   pc->data           = ispai;
626 
627   pc->ops->destroy         = PCDestroy_SPAI;
628   pc->ops->apply           = PCApply_SPAI;
629   pc->ops->applyrichardson = 0;
630   pc->ops->setup           = PCSetUp_SPAI;
631   pc->ops->view            = PCView_SPAI;
632   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
633 
634   ispai->epsilon    = .4;
635   ispai->nbsteps    = 5;
636   ispai->max        = 5000;
637   ispai->maxnew     = 5;
638   ispai->block_size = 1;
639   ispai->cache_size = 5;
640   ispai->verbose    = 0;
641 
642   ispai->sp         = 1;
643   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
644 
645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
646                     "PCSPAISetEpsilon_SPAI",
647                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
649                     "PCSPAISetNBSteps_SPAI",
650                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
652                     "PCSPAISetMax_SPAI",
653                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
655                     "PCSPAISetMaxNew_SPAI",
656                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
658                     "PCSPAISetBlockSize_SPAI",
659                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
661                     "PCSPAISetCacheSize_SPAI",
662                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
664                     "PCSPAISetVerbose_SPAI",
665                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
667                     "PCSPAISetSp_SPAI",
668                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 EXTERN_C_END
672 
673 /**********************************************************************/
674 
675 /*
676    Converts from a PETSc matrix to an SPAI matrix
677 */
678 #undef __FUNCT__
679 #define __FUNCT__ "ConvertMatToMatrix"
680 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
681 {
682   matrix                  *M;
683   int                     i,j,col;
684   int                     row_indx;
685   int                     len,pe,local_indx,start_indx;
686   int                     *mapping;
687   PetscErrorCode          ierr;
688   const int               *cols;
689   const double            *vals;
690   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
691   PetscMPIInt             size,rank;
692   struct compressed_lines *rows;
693 
694   PetscFunctionBegin;
695   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
696   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
697   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
698   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
699 
700   /*
701     not sure why a barrier is required. commenting out
702   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
703   */
704 
705   M = new_matrix((SPAI_Comm)comm);
706 
707   M->n = n;
708   M->bs = 1;
709   M->max_block_size = 1;
710 
711   M->mnls          = (int*)malloc(sizeof(int)*size);
712   M->start_indices = (int*)malloc(sizeof(int)*size);
713   M->pe            = (int*)malloc(sizeof(int)*n);
714   M->block_sizes   = (int*)malloc(sizeof(int)*n);
715   for (i=0; i<n; i++) M->block_sizes[i] = 1;
716 
717   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
718 
719   M->start_indices[0] = 0;
720   for (i=1; i<size; i++) {
721     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
722   }
723 
724   M->mnl = M->mnls[M->myid];
725   M->my_start_index = M->start_indices[M->myid];
726 
727   for (i=0; i<size; i++) {
728     start_indx = M->start_indices[i];
729     for (j=0; j<M->mnls[i]; j++)
730       M->pe[start_indx+j] = i;
731   }
732 
733   if (AT) {
734     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
735   } else {
736     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
737   }
738 
739   rows = M->lines;
740 
741   /* Determine the mapping from global indices to pointers */
742   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
743   pe         = 0;
744   local_indx = 0;
745   for (i=0; i<M->n; i++) {
746     if (local_indx >= M->mnls[pe]) {
747       pe++;
748       local_indx = 0;
749     }
750     mapping[i] = local_indx + M->start_indices[pe];
751     local_indx++;
752   }
753 
754 
755   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
756 
757   /*********************************************************/
758   /************** Set up the row structure *****************/
759   /*********************************************************/
760 
761   /* count number of nonzeros in every row */
762   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
763   for (i=rstart; i<rend; i++) {
764     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
765     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
766   }
767 
768   /* allocate buffers */
769   len = 0;
770   for (i=0; i<mnl; i++) {
771     if (len < num_ptr[i]) len = num_ptr[i];
772   }
773 
774   for (i=rstart; i<rend; i++) {
775     row_indx             = i-rstart;
776     len                  = num_ptr[row_indx];
777     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
778     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
779   }
780 
781   /* copy the matrix */
782   for (i=rstart; i<rend; i++) {
783     row_indx = i - rstart;
784     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
785     for (j=0; j<nz; j++) {
786       col = cols[j];
787       len = rows->len[row_indx]++;
788       rows->ptrs[row_indx][len] = mapping[col];
789       rows->A[row_indx][len]    = vals[j];
790     }
791     rows->slen[row_indx] = rows->len[row_indx];
792     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
793   }
794 
795 
796   /************************************************************/
797   /************** Set up the column structure *****************/
798   /*********************************************************/
799 
800   if (AT) {
801 
802     /* count number of nonzeros in every column */
803     for (i=rstart; i<rend; i++) {
804       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
805       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
806     }
807 
808     /* allocate buffers */
809     len = 0;
810     for (i=0; i<mnl; i++) {
811       if (len < num_ptr[i]) len = num_ptr[i];
812     }
813 
814     for (i=rstart; i<rend; i++) {
815       row_indx = i-rstart;
816       len      = num_ptr[row_indx];
817       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
818     }
819 
820     /* copy the matrix (i.e., the structure) */
821     for (i=rstart; i<rend; i++) {
822       row_indx = i - rstart;
823       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
824       for (j=0; j<nz; j++) {
825         col = cols[j];
826         len = rows->rlen[row_indx]++;
827         rows->rptrs[row_indx][len] = mapping[col];
828       }
829       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
830     }
831   }
832 
833   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
834   ierr = PetscFree(mapping);CHKERRQ(ierr);
835 
836   order_pointers(M);
837   M->maxnz = calc_maxnz(M);
838   *B = M;
839   PetscFunctionReturn(0);
840 }
841 
842 /**********************************************************************/
843 
844 /*
845    Converts from an SPAI matrix B  to a PETSc matrix PB.
846    This assumes that the the SPAI matrix B is stored in
847    COMPRESSED-ROW format.
848 */
849 #undef __FUNCT__
850 #define __FUNCT__ "ConvertMatrixToMat"
851 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
852 {
853   PetscMPIInt    size,rank;
854   PetscErrorCode ierr;
855   int            m,n,M,N;
856   int            d_nz,o_nz;
857   int            *d_nnz,*o_nnz;
858   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
859   PetscScalar    val;
860 
861   PetscFunctionBegin;
862   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
863   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
864 
865   m = n = B->mnls[rank];
866   d_nz = o_nz = 0;
867 
868   /* Determine preallocation for MatCreateMPIAIJ */
869   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
870   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
871   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
872   first_diag_col = B->start_indices[rank];
873   last_diag_col = first_diag_col + B->mnls[rank];
874   for (i=0; i<B->mnls[rank]; i++) {
875     for (k=0; k<B->lines->len[i]; k++) {
876       global_col = B->lines->ptrs[i][k];
877       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
878       else o_nnz[i]++;
879     }
880   }
881 
882   M = N = B->n;
883   /* Here we only know how to create AIJ format */
884   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
885   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
886   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
887   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
888   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
889 
890   for (i=0; i<B->mnls[rank]; i++) {
891     global_row = B->start_indices[rank]+i;
892     for (k=0; k<B->lines->len[i]; k++) {
893       global_col = B->lines->ptrs[i][k];
894       val = B->lines->A[i][k];
895       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
896     }
897   }
898 
899   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
900   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
901 
902   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
903   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 /**********************************************************************/
908 
909 /*
910    Converts from an SPAI vector v  to a PETSc vec Pv.
911 */
912 #undef __FUNCT__
913 #define __FUNCT__ "ConvertVectorToVec"
914 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
915 {
916   PetscErrorCode ierr;
917   PetscMPIInt    size,rank;
918   int            m,M,i,*mnls,*start_indices,*global_indices;
919 
920   PetscFunctionBegin;
921   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
922   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
923 
924   m = v->mnl;
925   M = v->n;
926 
927 
928   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
929 
930   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
931   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
932 
933   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
934   start_indices[0] = 0;
935   for (i=1; i<size; i++)
936     start_indices[i] = start_indices[i-1] +mnls[i-1];
937 
938   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
939   for (i=0; i<v->mnl; i++)
940     global_indices[i] = start_indices[rank] + i;
941 
942   ierr = PetscFree(mnls);CHKERRQ(ierr);
943   ierr = PetscFree(start_indices);CHKERRQ(ierr);
944 
945   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
946   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
947   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
948 
949   ierr = PetscFree(global_indices);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 
954 
955 
956