xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCKSP_DLL
2 
3 /*
4    3/99 Modified by Stephen Barnard to support SPAI version 3.0
5 */
6 
7 /*
8       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9    Code written by Stephen Barnard.
10 
11       Note: there is some BAD memory bleeding below!
12 
13       This code needs work
14 
15    1) get rid of all memory bleeding
16    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17       rather than having the sp flag for PC_SPAI
18    3) fix to set the block size based on the matrix block size
19 
20 */
21 
22 #include "private/pcimpl.h"        /*I "petscpc.h" I*/
23 #include "petscspai.h"
24 
25 /*
26     These are the SPAI include files
27 */
28 EXTERN_C_BEGIN
29 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
30 #include "spai.h"
31 #include "matrix.h"
32 EXTERN_C_END
33 
34 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
35 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
36 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
37 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);
38 
39 typedef struct {
40 
41   matrix   *B;              /* matrix in SPAI format */
42   matrix   *BT;             /* transpose of matrix in SPAI format */
43   matrix   *M;              /* the approximate inverse in SPAI format */
44 
45   Mat      PM;              /* the approximate inverse PETSc format */
46 
47   double   epsilon;         /* tolerance */
48   int      nbsteps;         /* max number of "improvement" steps per line */
49   int      max;             /* max dimensions of is_I, q, etc. */
50   int      maxnew;          /* max number of new entries per step */
51   int      block_size;      /* constant block size */
52   int      cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
53   int      verbose;         /* SPAI prints timing and statistics */
54 
55   int      sp;              /* symmetric nonzero pattern */
56   MPI_Comm comm_spai;     /* communicator to be used with spai */
57 } PC_SPAI;
58 
59 /**********************************************************************/
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "PCSetUp_SPAI"
63 static PetscErrorCode PCSetUp_SPAI(PC pc)
64 {
65   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
66   PetscErrorCode ierr;
67   Mat            AT;
68 
69   PetscFunctionBegin;
70 
71   init_SPAI();
72 
73   if (ispai->sp) {
74     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
75   } else {
76     /* Use the transpose to get the column nonzero structure. */
77     ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr);
78     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
79     ierr = MatDestroy(AT);CHKERRQ(ierr);
80   }
81 
82   /* Destroy the transpose */
83   /* Don't know how to do it. PETSc developers? */
84 
85   /* construct SPAI preconditioner */
86   /* FILE *messages */     /* file for warning messages */
87   /* double epsilon */     /* tolerance */
88   /* int nbsteps */        /* max number of "improvement" steps per line */
89   /* int max */            /* max dimensions of is_I, q, etc. */
90   /* int maxnew */         /* max number of new entries per step */
91   /* int block_size */     /* block_size == 1 specifies scalar elments
92                               block_size == n specifies nxn constant-block elements
93                               block_size == 0 specifies variable-block elements */
94   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
95                            /* cache_size == 0 indicates no caching */
96   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
97                               verbose == 1 prints timing and matrix statistics */
98 
99   ierr = bspai(ispai->B,&ispai->M,
100 		   stdout,
101 		   ispai->epsilon,
102 		   ispai->nbsteps,
103 		   ispai->max,
104 		   ispai->maxnew,
105 		   ispai->block_size,
106 		   ispai->cache_size,
107 	       ispai->verbose); CHKERRQ(ierr);
108 
109   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
110 
111   /* free the SPAI matrices */
112   sp_free_matrix(ispai->B);
113   sp_free_matrix(ispai->M);
114 
115   PetscFunctionReturn(0);
116 }
117 
118 /**********************************************************************/
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "PCApply_SPAI"
122 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
123 {
124   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
125   PetscErrorCode ierr;
126 
127   PetscFunctionBegin;
128   /* Now using PETSc's multiply */
129   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 /**********************************************************************/
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "PCDestroy_SPAI"
137 static PetscErrorCode PCDestroy_SPAI(PC pc)
138 {
139   PetscErrorCode ierr;
140   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
141 
142   PetscFunctionBegin;
143   if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);}
144   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
145   ierr = PetscFree(ispai);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 /**********************************************************************/
150 
151 #undef __FUNCT__
152 #define __FUNCT__ "PCView_SPAI"
153 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
154 {
155   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
156   PetscErrorCode ierr;
157   PetscTruth     iascii;
158 
159   PetscFunctionBegin;
160   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
161   if (iascii) {
162     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
169     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
170     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 EXTERN_C_BEGIN
176 #undef __FUNCT__
177 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
178 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
179 {
180   PC_SPAI *ispai = (PC_SPAI*)pc->data;
181   PetscFunctionBegin;
182   ispai->epsilon = epsilon1;
183   PetscFunctionReturn(0);
184 }
185 EXTERN_C_END
186 
187 /**********************************************************************/
188 
189 EXTERN_C_BEGIN
190 #undef __FUNCT__
191 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
192 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
193 {
194   PC_SPAI *ispai = (PC_SPAI*)pc->data;
195   PetscFunctionBegin;
196   ispai->nbsteps = nbsteps1;
197   PetscFunctionReturn(0);
198 }
199 EXTERN_C_END
200 
201 /**********************************************************************/
202 
203 /* added 1/7/99 g.h. */
204 EXTERN_C_BEGIN
205 #undef __FUNCT__
206 #define __FUNCT__ "PCSPAISetMax_SPAI"
207 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1)
208 {
209   PC_SPAI *ispai = (PC_SPAI*)pc->data;
210   PetscFunctionBegin;
211   ispai->max = max1;
212   PetscFunctionReturn(0);
213 }
214 EXTERN_C_END
215 
216 /**********************************************************************/
217 
218 EXTERN_C_BEGIN
219 #undef __FUNCT__
220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
221 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
222 {
223   PC_SPAI *ispai = (PC_SPAI*)pc->data;
224   PetscFunctionBegin;
225   ispai->maxnew = maxnew1;
226   PetscFunctionReturn(0);
227 }
228 EXTERN_C_END
229 
230 /**********************************************************************/
231 
232 EXTERN_C_BEGIN
233 #undef __FUNCT__
234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
235 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
236 {
237   PC_SPAI *ispai = (PC_SPAI*)pc->data;
238   PetscFunctionBegin;
239   ispai->block_size = block_size1;
240   PetscFunctionReturn(0);
241 }
242 EXTERN_C_END
243 
244 /**********************************************************************/
245 
246 EXTERN_C_BEGIN
247 #undef __FUNCT__
248 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
249 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
250 {
251   PC_SPAI *ispai = (PC_SPAI*)pc->data;
252   PetscFunctionBegin;
253   ispai->cache_size = cache_size;
254   PetscFunctionReturn(0);
255 }
256 EXTERN_C_END
257 
258 /**********************************************************************/
259 
260 EXTERN_C_BEGIN
261 #undef __FUNCT__
262 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
263 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose)
264 {
265   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
266   PetscFunctionBegin;
267   ispai->verbose = verbose;
268   PetscFunctionReturn(0);
269 }
270 EXTERN_C_END
271 
272 /**********************************************************************/
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "PCSPAISetSp_SPAI"
277 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp)
278 {
279   PC_SPAI *ispai = (PC_SPAI*)pc->data;
280   PetscFunctionBegin;
281   ispai->sp = sp;
282   PetscFunctionReturn(0);
283 }
284 EXTERN_C_END
285 
286 /* -------------------------------------------------------------------*/
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "PCSPAISetEpsilon"
290 /*@
291   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
292 
293   Input Parameters:
294 + pc - the preconditioner
295 - eps - epsilon (default .4)
296 
297   Notes:  Espilon must be between 0 and 1. It controls the
298                  quality of the approximation of M to the inverse of
299                  A. Higher values of epsilon lead to more work, more
300                  fill, and usually better preconditioners. In many
301                  cases the best choice of epsilon is the one that
302                  divides the total solution time equally between the
303                  preconditioner and the solver.
304 
305   Level: intermediate
306 
307 .seealso: PCSPAI, PCSetType()
308   @*/
309 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1)
310 {
311   PetscErrorCode ierr,(*f)(PC,double);
312   PetscFunctionBegin;
313   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr);
314   if (f) {
315     ierr = (*f)(pc,epsilon1);CHKERRQ(ierr);
316   }
317   PetscFunctionReturn(0);
318 }
319 
320 /**********************************************************************/
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "PCSPAISetNBSteps"
324 /*@
325   PCSPAISetNBSteps - set maximum number of improvement steps per row in
326         the SPAI preconditioner
327 
328   Input Parameters:
329 + pc - the preconditioner
330 - n - number of steps (default 5)
331 
332   Notes:  SPAI constructs to approximation to every column of
333                  the exact inverse of A in a series of improvement
334                  steps. The quality of the approximation is determined
335                  by epsilon. If an approximation achieving an accuracy
336                  of epsilon is not obtained after ns steps, SPAI simply
337                  uses the best approximation constructed so far.
338 
339   Level: intermediate
340 
341 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
342 @*/
343 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1)
344 {
345   PetscErrorCode ierr,(*f)(PC,int);
346   PetscFunctionBegin;
347   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr);
348   if (f) {
349     ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr);
350   }
351   PetscFunctionReturn(0);
352 }
353 
354 /**********************************************************************/
355 
356 /* added 1/7/99 g.h. */
357 #undef __FUNCT__
358 #define __FUNCT__ "PCSPAISetMax"
359 /*@
360   PCSPAISetMax - set the size of various working buffers in
361         the SPAI preconditioner
362 
363   Input Parameters:
364 + pc - the preconditioner
365 - n - size (default is 5000)
366 
367   Level: intermediate
368 
369 .seealso: PCSPAI, PCSetType()
370 @*/
371 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1)
372 {
373   PetscErrorCode ierr,(*f)(PC,int);
374   PetscFunctionBegin;
375   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr);
376   if (f) {
377     ierr = (*f)(pc,max1);CHKERRQ(ierr);
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 /**********************************************************************/
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "PCSPAISetMaxNew"
386 /*@
387   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
388    in SPAI preconditioner
389 
390   Input Parameters:
391 + pc - the preconditioner
392 - n - maximum number (default 5)
393 
394   Level: intermediate
395 
396 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
397 @*/
398 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1)
399 {
400   PetscErrorCode ierr,(*f)(PC,int);
401   PetscFunctionBegin;
402   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr);
403   if (f) {
404     ierr = (*f)(pc,maxnew1);CHKERRQ(ierr);
405   }
406   PetscFunctionReturn(0);
407 }
408 
409 /**********************************************************************/
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "PCSPAISetBlockSize"
413 /*@
414   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
415 
416   Input Parameters:
417 + pc - the preconditioner
418 - n - block size (default 1)
419 
420   Notes: A block
421                  size of 1 treats A as a matrix of scalar elements. A
422                  block size of s > 1 treats A as a matrix of sxs
423                  blocks. A block size of 0 treats A as a matrix with
424                  variable sized blocks, which are determined by
425                  searching for dense square diagonal blocks in A.
426                  This can be very effective for finite-element
427                  matrices.
428 
429                  SPAI will convert A to block form, use a block
430                  version of the preconditioner algorithm, and then
431                  convert the result back to scalar form.
432 
433                  In many cases the a block-size parameter other than 1
434                  can lead to very significant improvement in
435                  performance.
436 
437 
438   Level: intermediate
439 
440 .seealso: PCSPAI, PCSetType()
441 @*/
442 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1)
443 {
444   PetscErrorCode ierr,(*f)(PC,int);
445   PetscFunctionBegin;
446   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr);
447   if (f) {
448     ierr = (*f)(pc,block_size1);CHKERRQ(ierr);
449   }
450   PetscFunctionReturn(0);
451 }
452 
453 /**********************************************************************/
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "PCSPAISetCacheSize"
457 /*@
458   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
459 
460   Input Parameters:
461 + pc - the preconditioner
462 - n -  cache size {0,1,2,3,4,5} (default 5)
463 
464   Notes:    SPAI uses a hash table to cache messages and avoid
465                  redundant communication. If suggest always using
466                  5. This parameter is irrelevant in the serial
467                  version.
468 
469   Level: intermediate
470 
471 .seealso: PCSPAI, PCSetType()
472 @*/
473 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size)
474 {
475   PetscErrorCode ierr,(*f)(PC,int);
476   PetscFunctionBegin;
477   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr);
478   if (f) {
479     ierr = (*f)(pc,cache_size);CHKERRQ(ierr);
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 /**********************************************************************/
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "PCSPAISetVerbose"
488 /*@
489   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
490 
491   Input Parameters:
492 + pc - the preconditioner
493 - n - level (default 1)
494 
495   Notes: print parameters, timings and matrix statistics
496 
497   Level: intermediate
498 
499 .seealso: PCSPAI, PCSetType()
500 @*/
501 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose)
502 {
503   PetscErrorCode ierr,(*f)(PC,int);
504   PetscFunctionBegin;
505   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr);
506   if (f) {
507     ierr = (*f)(pc,verbose);CHKERRQ(ierr);
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 /**********************************************************************/
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "PCSPAISetSp"
516 /*@
517   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
518 
519   Input Parameters:
520 + pc - the preconditioner
521 - n - 0 or 1
522 
523   Notes: If A has a symmetric nonzero pattern use -sp 1 to
524                  improve performance by eliminating some communication
525                  in the parallel version. Even if A does not have a
526                  symmetric nonzero pattern -sp 1 may well lead to good
527                  results, but the code will not follow the published
528                  SPAI algorithm exactly.
529 
530 
531   Level: intermediate
532 
533 .seealso: PCSPAI, PCSetType()
534 @*/
535 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp)
536 {
537   PetscErrorCode ierr,(*f)(PC,int);
538   PetscFunctionBegin;
539   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr);
540   if (f) {
541     ierr = (*f)(pc,sp);CHKERRQ(ierr);
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 /**********************************************************************/
547 
548 /**********************************************************************/
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "PCSetFromOptions_SPAI"
552 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
553 {
554   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
555   PetscErrorCode ierr;
556   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
557   double         epsilon1;
558   PetscTruth     flg;
559 
560   PetscFunctionBegin;
561   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
562     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
563     if (flg) {
564       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
565     }
566     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
567     if (flg) {
568       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
569     }
570     /* added 1/7/99 g.h. */
571     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
572     if (flg) {
573       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
574     }
575     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
576     if (flg) {
577       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
578     }
579     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
580     if (flg) {
581       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
582     }
583     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
584     if (flg) {
585       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
586     }
587     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
588     if (flg) {
589       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
590     }
591     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
592     if (flg) {
593       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
594     }
595   ierr = PetscOptionsTail();CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 /**********************************************************************/
600 
601 /*MC
602    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
603      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
604 
605    Options Database Keys:
606 +  -pc_spai_set_epsilon <eps> - set tolerance
607 .  -pc_spai_nbstep <n> - set nbsteps
608 .  -pc_spai_max <m> - set max
609 .  -pc_spai_max_new <m> - set maxnew
610 .  -pc_spai_block_size <n> - set block size
611 .  -pc_spai_cache_size <n> - set cache size
612 .  -pc_spai_sp <m> - set sp
613 -  -pc_spai_set_verbose <true,false> - verbose output
614 
615    Notes: This only works with AIJ matrices.
616 
617    Level: beginner
618 
619    Concepts: approximate inverse
620 
621 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
622     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
623     PCSPAISetVerbose(), PCSPAISetSp()
624 M*/
625 
626 EXTERN_C_BEGIN
627 #undef __FUNCT__
628 #define __FUNCT__ "PCCreate_SPAI"
629 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc)
630 {
631   PC_SPAI        *ispai;
632   PetscErrorCode ierr;
633 
634   PetscFunctionBegin;
635   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
636   pc->data           = ispai;
637 
638   pc->ops->destroy         = PCDestroy_SPAI;
639   pc->ops->apply           = PCApply_SPAI;
640   pc->ops->applyrichardson = 0;
641   pc->ops->setup           = PCSetUp_SPAI;
642   pc->ops->view            = PCView_SPAI;
643   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
644 
645   ispai->epsilon    = .4;
646   ispai->nbsteps    = 5;
647   ispai->max        = 5000;
648   ispai->maxnew     = 5;
649   ispai->block_size = 1;
650   ispai->cache_size = 5;
651   ispai->verbose    = 0;
652 
653   ispai->sp         = 1;
654   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
655 
656   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
657                     "PCSPAISetEpsilon_SPAI",
658                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
659   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
660                     "PCSPAISetNBSteps_SPAI",
661                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
662   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
663                     "PCSPAISetMax_SPAI",
664                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
665   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
666                     "PCSPAISetMaxNew_SPAI",
667                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
668   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
669                     "PCSPAISetBlockSize_SPAI",
670                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
671   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
672                     "PCSPAISetCacheSize_SPAI",
673                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
674   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
675                     "PCSPAISetVerbose_SPAI",
676                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
677   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
678                     "PCSPAISetSp_SPAI",
679                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
680 
681   PetscFunctionReturn(0);
682 }
683 EXTERN_C_END
684 
685 /**********************************************************************/
686 
687 /*
688    Converts from a PETSc matrix to an SPAI matrix
689 */
690 #undef __FUNCT__
691 #define __FUNCT__ "ConvertMatToMatrix"
692 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
693 {
694   matrix                  *M;
695   int                     i,j,col;
696   int                     row_indx;
697   int                     len,pe,local_indx,start_indx;
698   int                     *mapping;
699   PetscErrorCode          ierr;
700   const int               *cols;
701   const double            *vals;
702   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
703   PetscMPIInt             size,rank;
704   struct compressed_lines *rows;
705 
706   PetscFunctionBegin;
707 
708   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
710   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
711   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
712 
713   /*
714     not sure why a barrier is required. commenting out
715   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
716   */
717 
718   M = new_matrix((SPAI_Comm)comm);
719 
720   M->n = n;
721   M->bs = 1;
722   M->max_block_size = 1;
723 
724   M->mnls          = (int*)malloc(sizeof(int)*size);
725   M->start_indices = (int*)malloc(sizeof(int)*size);
726   M->pe            = (int*)malloc(sizeof(int)*n);
727   M->block_sizes   = (int*)malloc(sizeof(int)*n);
728   for (i=0; i<n; i++) M->block_sizes[i] = 1;
729 
730   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
731 
732   M->start_indices[0] = 0;
733   for (i=1; i<size; i++) {
734     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
735   }
736 
737   M->mnl = M->mnls[M->myid];
738   M->my_start_index = M->start_indices[M->myid];
739 
740   for (i=0; i<size; i++) {
741     start_indx = M->start_indices[i];
742     for (j=0; j<M->mnls[i]; j++)
743       M->pe[start_indx+j] = i;
744   }
745 
746   if (AT) {
747     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
748   } else {
749     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
750   }
751 
752   rows = M->lines;
753 
754   /* Determine the mapping from global indices to pointers */
755   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
756   pe         = 0;
757   local_indx = 0;
758   for (i=0; i<M->n; i++) {
759     if (local_indx >= M->mnls[pe]) {
760       pe++;
761       local_indx = 0;
762     }
763     mapping[i] = local_indx + M->start_indices[pe];
764     local_indx++;
765   }
766 
767 
768   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
769 
770   /*********************************************************/
771   /************** Set up the row structure *****************/
772   /*********************************************************/
773 
774   /* count number of nonzeros in every row */
775   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
776   for (i=rstart; i<rend; i++) {
777     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
778     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
779   }
780 
781   /* allocate buffers */
782   len = 0;
783   for (i=0; i<mnl; i++) {
784     if (len < num_ptr[i]) len = num_ptr[i];
785   }
786 
787   for (i=rstart; i<rend; i++) {
788     row_indx             = i-rstart;
789     len                  = num_ptr[row_indx];
790     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
791     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
792   }
793 
794   /* copy the matrix */
795   for (i=rstart; i<rend; i++) {
796     row_indx = i - rstart;
797     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
798     for (j=0; j<nz; j++) {
799       col = cols[j];
800       len = rows->len[row_indx]++;
801       rows->ptrs[row_indx][len] = mapping[col];
802       rows->A[row_indx][len]    = vals[j];
803     }
804     rows->slen[row_indx] = rows->len[row_indx];
805     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
806   }
807 
808 
809   /************************************************************/
810   /************** Set up the column structure *****************/
811   /*********************************************************/
812 
813   if (AT) {
814 
815     /* count number of nonzeros in every column */
816     for (i=rstart; i<rend; i++) {
817       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
818       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
819     }
820 
821     /* allocate buffers */
822     len = 0;
823     for (i=0; i<mnl; i++) {
824       if (len < num_ptr[i]) len = num_ptr[i];
825     }
826 
827     for (i=rstart; i<rend; i++) {
828       row_indx = i-rstart;
829       len      = num_ptr[row_indx];
830       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
831     }
832 
833     /* copy the matrix (i.e., the structure) */
834     for (i=rstart; i<rend; i++) {
835       row_indx = i - rstart;
836       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
837       for (j=0; j<nz; j++) {
838 	col = cols[j];
839 	len = rows->rlen[row_indx]++;
840 	rows->rptrs[row_indx][len] = mapping[col];
841       }
842       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
843     }
844   }
845 
846   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
847   ierr = PetscFree(mapping);CHKERRQ(ierr);
848 
849   order_pointers(M);
850   M->maxnz = calc_maxnz(M);
851 
852   *B = M;
853 
854   PetscFunctionReturn(0);
855 }
856 
857 /**********************************************************************/
858 
859 /*
860    Converts from an SPAI matrix B  to a PETSc matrix PB.
861    This assumes that the the SPAI matrix B is stored in
862    COMPRESSED-ROW format.
863 */
864 #undef __FUNCT__
865 #define __FUNCT__ "ConvertMatrixToMat"
866 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
867 {
868   PetscMPIInt    size,rank;
869   PetscErrorCode ierr;
870   int            m,n,M,N;
871   int            d_nz,o_nz;
872   int            *d_nnz,*o_nnz;
873   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
874   PetscScalar    val;
875 
876   PetscFunctionBegin;
877   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
878   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
879 
880   m = n = B->mnls[rank];
881   d_nz = o_nz = 0;
882 
883   /* Determine preallocation for MatCreateMPIAIJ */
884   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
885   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
886   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
887   first_diag_col = B->start_indices[rank];
888   last_diag_col = first_diag_col + B->mnls[rank];
889   for (i=0; i<B->mnls[rank]; i++) {
890     for (k=0; k<B->lines->len[i]; k++) {
891       global_col = B->lines->ptrs[i][k];
892       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
893 	d_nnz[i]++;
894       else
895 	o_nnz[i]++;
896     }
897   }
898 
899   M = N = B->n;
900   /* Here we only know how to create AIJ format */
901   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
902   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
903   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
904   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
905   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
906 
907   for (i=0; i<B->mnls[rank]; i++) {
908     global_row = B->start_indices[rank]+i;
909     for (k=0; k<B->lines->len[i]; k++) {
910       global_col = B->lines->ptrs[i][k];
911       val = B->lines->A[i][k];
912       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
913     }
914   }
915 
916   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
917   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
918 
919   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
920   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 /**********************************************************************/
926 
927 /*
928    Converts from an SPAI vector v  to a PETSc vec Pv.
929 */
930 #undef __FUNCT__
931 #define __FUNCT__ "ConvertVectorToVec"
932 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
933 {
934   PetscErrorCode ierr;
935   PetscMPIInt    size,rank;
936   int            m,M,i,*mnls,*start_indices,*global_indices;
937 
938   PetscFunctionBegin;
939   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
940   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
941 
942   m = v->mnl;
943   M = v->n;
944 
945 
946   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
947 
948   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
949   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
950 
951   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
952   start_indices[0] = 0;
953   for (i=1; i<size; i++)
954     start_indices[i] = start_indices[i-1] +mnls[i-1];
955 
956   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
957   for (i=0; i<v->mnl; i++)
958     global_indices[i] = start_indices[rank] + i;
959 
960   ierr = PetscFree(mnls);CHKERRQ(ierr);
961   ierr = PetscFree(start_indices);CHKERRQ(ierr);
962 
963   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
964   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
965   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
966 
967   ierr = PetscFree(global_indices);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 
972 
973 
974