xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision fd705b320d8d44969be9ca25a36dbdd35fbe8e12)
1 #define PETSCKSP_DLL
2 
3 /*
4    3/99 Modified by Stephen Barnard to support SPAI version 3.0
5 */
6 
7 /*
8       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9    Code written by Stephen Barnard.
10 
11       Note: there is some BAD memory bleeding below!
12 
13       This code needs work
14 
15    1) get rid of all memory bleeding
16    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17       rather than having the sp flag for PC_SPAI
18 
19 */
20 
21 #include "private/pcimpl.h"        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include "spai.h"
30 #include "matrix.h"
31 EXTERN_C_END
32 
33 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
35 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);
37 
38 typedef struct {
39 
40   matrix *B;              /* matrix in SPAI format */
41   matrix *BT;             /* transpose of matrix in SPAI format */
42   matrix *M;              /* the approximate inverse in SPAI format */
43 
44   Mat    PM;              /* the approximate inverse PETSc format */
45 
46   double epsilon;         /* tolerance */
47   int    nbsteps;         /* max number of "improvement" steps per line */
48   int    max;             /* max dimensions of is_I, q, etc. */
49   int    maxnew;          /* max number of new entries per step */
50   int    block_size;      /* constant block size */
51   int    cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;         /* SPAI prints timing and statistics */
53 
54   int    sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat      AT;
67 
68   PetscFunctionBegin;
69 
70   init_SPAI();
71 
72   if (ispai->sp) {
73     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
74   } else {
75     /* Use the transpose to get the column nonzero structure. */
76     ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr);
77     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
78     ierr = MatDestroy(AT);CHKERRQ(ierr);
79   }
80 
81   /* Destroy the transpose */
82   /* Don't know how to do it. PETSc developers? */
83 
84   /* construct SPAI preconditioner */
85   /* FILE *messages */     /* file for warning messages */
86   /* double epsilon */     /* tolerance */
87   /* int nbsteps */        /* max number of "improvement" steps per line */
88   /* int max */            /* max dimensions of is_I, q, etc. */
89   /* int maxnew */         /* max number of new entries per step */
90   /* int block_size */     /* block_size == 1 specifies scalar elments
91                               block_size == n specifies nxn constant-block elements
92                               block_size == 0 specifies variable-block elements */
93   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
94                            /* cache_size == 0 indicates no caching */
95   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
96                               verbose == 1 prints timing and matrix statistics */
97 
98   ierr = bspai(ispai->B,&ispai->M,
99 		   stdout,
100 		   ispai->epsilon,
101 		   ispai->nbsteps,
102 		   ispai->max,
103 		   ispai->maxnew,
104 		   ispai->block_size,
105 		   ispai->cache_size,
106 	       ispai->verbose); CHKERRQ(ierr);
107 
108   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
109 
110   /* free the SPAI matrices */
111   sp_free_matrix(ispai->B);
112   sp_free_matrix(ispai->M);
113 
114   PetscFunctionReturn(0);
115 }
116 
117 /**********************************************************************/
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "PCApply_SPAI"
121 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
122 {
123   PC_SPAI *ispai = (PC_SPAI*)pc->data;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   /* Now using PETSc's multiply */
128   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /**********************************************************************/
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "PCDestroy_SPAI"
136 static PetscErrorCode PCDestroy_SPAI(PC pc)
137 {
138   PetscErrorCode ierr;
139   PC_SPAI *ispai = (PC_SPAI*)pc->data;
140 
141   PetscFunctionBegin;
142   if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);}
143   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
144   ierr = PetscFree(ispai);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 /**********************************************************************/
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "PCView_SPAI"
152 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
153 {
154   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
155   PetscErrorCode ierr;
156   PetscTruth iascii;
157 
158   PetscFunctionBegin;
159   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
160   if (iascii) {
161     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
169     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
170 
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 EXTERN_C_BEGIN
176 #undef __FUNCT__
177 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
178 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
179 {
180   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
181   PetscFunctionBegin;
182   ispai->epsilon = epsilon1;
183   PetscFunctionReturn(0);
184 }
185 EXTERN_C_END
186 
187 /**********************************************************************/
188 
189 EXTERN_C_BEGIN
190 #undef __FUNCT__
191 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
192 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
193 {
194   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
195   PetscFunctionBegin;
196   ispai->nbsteps = nbsteps1;
197   PetscFunctionReturn(0);
198 }
199 EXTERN_C_END
200 
201 /**********************************************************************/
202 
203 /* added 1/7/99 g.h. */
204 EXTERN_C_BEGIN
205 #undef __FUNCT__
206 #define __FUNCT__ "PCSPAISetMax_SPAI"
207 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1)
208 {
209   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
210   PetscFunctionBegin;
211   ispai->max = max1;
212   PetscFunctionReturn(0);
213 }
214 EXTERN_C_END
215 
216 /**********************************************************************/
217 
218 EXTERN_C_BEGIN
219 #undef __FUNCT__
220 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
221 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
222 {
223   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
224   PetscFunctionBegin;
225   ispai->maxnew = maxnew1;
226   PetscFunctionReturn(0);
227 }
228 EXTERN_C_END
229 
230 /**********************************************************************/
231 
232 EXTERN_C_BEGIN
233 #undef __FUNCT__
234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
235 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
236 {
237   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
238   PetscFunctionBegin;
239   ispai->block_size = block_size1;
240   PetscFunctionReturn(0);
241 }
242 EXTERN_C_END
243 
244 /**********************************************************************/
245 
246 EXTERN_C_BEGIN
247 #undef __FUNCT__
248 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
249 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
250 {
251   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
252   PetscFunctionBegin;
253   ispai->cache_size = cache_size;
254   PetscFunctionReturn(0);
255 }
256 EXTERN_C_END
257 
258 /**********************************************************************/
259 
260 EXTERN_C_BEGIN
261 #undef __FUNCT__
262 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
263 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose)
264 {
265   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
266   PetscFunctionBegin;
267   ispai->verbose = verbose;
268   PetscFunctionReturn(0);
269 }
270 EXTERN_C_END
271 
272 /**********************************************************************/
273 
274 EXTERN_C_BEGIN
275 #undef __FUNCT__
276 #define __FUNCT__ "PCSPAISetSp_SPAI"
277 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp)
278 {
279   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
280   PetscFunctionBegin;
281   ispai->sp = sp;
282   PetscFunctionReturn(0);
283 }
284 EXTERN_C_END
285 
286 /* -------------------------------------------------------------------*/
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "PCSPAISetEpsilon"
290 /*@
291   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
292 
293   Input Parameters:
294 + pc - the preconditioner
295 - eps - epsilon (default .4)
296 
297   Notes:  Espilon must be between 0 and 1. It controls the
298                  quality of the approximation of M to the inverse of
299                  A. Higher values of epsilon lead to more work, more
300                  fill, and usually better preconditioners. In many
301                  cases the best choice of epsilon is the one that
302                  divides the total solution time equally between the
303                  preconditioner and the solver.
304 
305   Level: intermediate
306 
307 .seealso: PCSPAI, PCSetType()
308   @*/
309 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1)
310 {
311   PetscErrorCode ierr,(*f)(PC,double);
312   PetscFunctionBegin;
313   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr);
314   if (f) {
315     ierr = (*f)(pc,epsilon1);CHKERRQ(ierr);
316   }
317   PetscFunctionReturn(0);
318 }
319 
320 /**********************************************************************/
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "PCSPAISetNBSteps"
324 /*@
325   PCSPAISetNBSteps - set maximum number of improvement steps per row in
326         the SPAI preconditioner
327 
328   Input Parameters:
329 + pc - the preconditioner
330 - n - number of steps (default 5)
331 
332   Notes:  SPAI constructs to approximation to every column of
333                  the exact inverse of A in a series of improvement
334                  steps. The quality of the approximation is determined
335                  by epsilon. If an approximation achieving an accuracy
336                  of epsilon is not obtained after ns steps, SPAI simply
337                  uses the best approximation constructed so far.
338 
339   Level: intermediate
340 
341 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
342 @*/
343 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1)
344 {
345   PetscErrorCode ierr,(*f)(PC,int);
346   PetscFunctionBegin;
347   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr);
348   if (f) {
349     ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr);
350   }
351   PetscFunctionReturn(0);
352 }
353 
354 /**********************************************************************/
355 
356 /* added 1/7/99 g.h. */
357 #undef __FUNCT__
358 #define __FUNCT__ "PCSPAISetMax"
359 /*@
360   PCSPAISetMax - set the size of various working buffers in
361         the SPAI preconditioner
362 
363   Input Parameters:
364 + pc - the preconditioner
365 - n - size (default is 5000)
366 
367   Level: intermediate
368 
369 .seealso: PCSPAI, PCSetType()
370 @*/
371 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1)
372 {
373   PetscErrorCode ierr,(*f)(PC,int);
374   PetscFunctionBegin;
375   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr);
376   if (f) {
377     ierr = (*f)(pc,max1);CHKERRQ(ierr);
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 /**********************************************************************/
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "PCSPAISetMaxNew"
386 /*@
387   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
388    in SPAI preconditioner
389 
390   Input Parameters:
391 + pc - the preconditioner
392 - n - maximum number (default 5)
393 
394   Level: intermediate
395 
396 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
397 @*/
398 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1)
399 {
400   PetscErrorCode ierr,(*f)(PC,int);
401   PetscFunctionBegin;
402   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr);
403   if (f) {
404     ierr = (*f)(pc,maxnew1);CHKERRQ(ierr);
405   }
406   PetscFunctionReturn(0);
407 }
408 
409 /**********************************************************************/
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "PCSPAISetBlockSize"
413 /*@
414   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
415 
416   Input Parameters:
417 + pc - the preconditioner
418 - n - block size (default 1)
419 
420   Notes: A block
421                  size of 1 treats A as a matrix of scalar elements. A
422                  block size of s > 1 treats A as a matrix of sxs
423                  blocks. A block size of 0 treats A as a matrix with
424                  variable sized blocks, which are determined by
425                  searching for dense square diagonal blocks in A.
426                  This can be very effective for finite-element
427                  matrices.
428 
429                  SPAI will convert A to block form, use a block
430                  version of the preconditioner algorithm, and then
431                  convert the result back to scalar form.
432 
433                  In many cases the a block-size parameter other than 1
434                  can lead to very significant improvement in
435                  performance.
436 
437 
438   Level: intermediate
439 
440 .seealso: PCSPAI, PCSetType()
441 @*/
442 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1)
443 {
444   PetscErrorCode ierr,(*f)(PC,int);
445   PetscFunctionBegin;
446   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr);
447   if (f) {
448     ierr = (*f)(pc,block_size1);CHKERRQ(ierr);
449   }
450   PetscFunctionReturn(0);
451 }
452 
453 /**********************************************************************/
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "PCSPAISetCacheSize"
457 /*@
458   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
459 
460   Input Parameters:
461 + pc - the preconditioner
462 - n -  cache size {0,1,2,3,4,5} (default 5)
463 
464   Notes:    SPAI uses a hash table to cache messages and avoid
465                  redundant communication. If suggest always using
466                  5. This parameter is irrelevant in the serial
467                  version.
468 
469   Level: intermediate
470 
471 .seealso: PCSPAI, PCSetType()
472 @*/
473 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size)
474 {
475   PetscErrorCode ierr,(*f)(PC,int);
476   PetscFunctionBegin;
477   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr);
478   if (f) {
479     ierr = (*f)(pc,cache_size);CHKERRQ(ierr);
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 /**********************************************************************/
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "PCSPAISetVerbose"
488 /*@
489   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
490 
491   Input Parameters:
492 + pc - the preconditioner
493 - n - level (default 1)
494 
495   Notes: print parameters, timings and matrix statistics
496 
497   Level: intermediate
498 
499 .seealso: PCSPAI, PCSetType()
500 @*/
501 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose)
502 {
503   PetscErrorCode ierr,(*f)(PC,int);
504   PetscFunctionBegin;
505   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr);
506   if (f) {
507     ierr = (*f)(pc,verbose);CHKERRQ(ierr);
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 /**********************************************************************/
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "PCSPAISetSp"
516 /*@
517   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
518 
519   Input Parameters:
520 + pc - the preconditioner
521 - n - 0 or 1
522 
523   Notes: If A has a symmetric nonzero pattern use -sp 1 to
524                  improve performance by eliminating some communication
525                  in the parallel version. Even if A does not have a
526                  symmetric nonzero pattern -sp 1 may well lead to good
527                  results, but the code will not follow the published
528                  SPAI algorithm exactly.
529 
530 
531   Level: intermediate
532 
533 .seealso: PCSPAI, PCSetType()
534 @*/
535 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp)
536 {
537   PetscErrorCode ierr,(*f)(PC,int);
538   PetscFunctionBegin;
539   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr);
540   if (f) {
541     ierr = (*f)(pc,sp);CHKERRQ(ierr);
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 /**********************************************************************/
547 
548 /**********************************************************************/
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "PCSetFromOptions_SPAI"
552 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
553 {
554   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
555   PetscErrorCode ierr;
556   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
557   double         epsilon1;
558   PetscTruth     flg;
559 
560   PetscFunctionBegin;
561   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
562     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
563     if (flg) {
564       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
565     }
566     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
567     if (flg) {
568       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
569     }
570     /* added 1/7/99 g.h. */
571     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
572     if (flg) {
573       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
574     }
575     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
576     if (flg) {
577       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
578     }
579     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
580     if (flg) {
581       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
582     }
583     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
584     if (flg) {
585       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
586     }
587     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
588     if (flg) {
589       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
590     }
591     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
592     if (flg) {
593       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
594     }
595   ierr = PetscOptionsTail();CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 /**********************************************************************/
600 
601 /*MC
602    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
603      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
604 
605    Options Database Keys:
606 +  -pc_spai_set_epsilon <eps> - set tolerance
607 .  -pc_spai_nbstep <n> - set nbsteps
608 .  -pc_spai_max <m> - set max
609 .  -pc_spai_max_new <m> - set maxnew
610 .  -pc_spai_block_size <n> - set block size
611 .  -pc_spai_cache_size <n> - set cache size
612 .  -pc_spai_sp <m> - set sp
613 -  -pc_spai_set_verbose <true,false> - verbose output
614 
615    Notes: This only works with AIJ matrices.
616 
617    Level: beginner
618 
619    Concepts: approximate inverse
620 
621 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
622     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
623     PCSPAISetVerbose(), PCSPAISetSp()
624 M*/
625 
626 EXTERN_C_BEGIN
627 #undef __FUNCT__
628 #define __FUNCT__ "PCCreate_SPAI"
629 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc)
630 {
631   PC_SPAI *ispai;
632   PetscErrorCode ierr;
633 
634   PetscFunctionBegin;
635   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
636   pc->data           = (void*)ispai;
637 
638   pc->ops->destroy         = PCDestroy_SPAI;
639   pc->ops->apply           = PCApply_SPAI;
640   pc->ops->applyrichardson = 0;
641   pc->ops->setup           = PCSetUp_SPAI;
642   pc->ops->view            = PCView_SPAI;
643   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
644 
645   ispai->epsilon    = .4;
646   ispai->nbsteps    = 5;
647   ispai->max        = 5000;
648   ispai->maxnew     = 5;
649   ispai->block_size = 1;
650   ispai->cache_size = 5;
651   ispai->verbose    = 0;
652 
653   ispai->sp         = 1;
654   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
655 
656   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
657                     "PCSPAISetEpsilon_SPAI",
658                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
659   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
660                     "PCSPAISetNBSteps_SPAI",
661                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
662   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
663                     "PCSPAISetMax_SPAI",
664                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
665   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
666                     "PCSPAISetMaxNew_SPAI",
667                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
668   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
669                     "PCSPAISetBlockSize_SPAI",
670                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
671   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
672                     "PCSPAISetCacheSize_SPAI",
673                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
674   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
675                     "PCSPAISetVerbose_SPAI",
676                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
677   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
678                     "PCSPAISetSp_SPAI",
679                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
680 
681   PetscFunctionReturn(0);
682 }
683 EXTERN_C_END
684 
685 /**********************************************************************/
686 
687 /*
688    Converts from a PETSc matrix to an SPAI matrix
689 */
690 #undef __FUNCT__
691 #define __FUNCT__ "ConvertMatToMatrix"
692 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
693 {
694   matrix                  *M;
695   int                     i,j,col;
696   int                     row_indx;
697   int                     len,pe,local_indx,start_indx;
698   int                     *mapping;
699   PetscErrorCode ierr;
700   const int               *cols;
701   const double            *vals;
702   int                     *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend;
703   struct compressed_lines *rows;
704 
705   PetscFunctionBegin;
706 
707   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
709   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
710   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
711 
712   /*
713     not sure why a barrier is required. commenting out
714   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
715   */
716 
717   M = new_matrix((SPAI_Comm)comm);
718 
719   M->n = n;
720   M->bs = 1;
721   M->max_block_size = 1;
722 
723   M->mnls          = (int*)malloc(sizeof(int)*size);
724   M->start_indices = (int*)malloc(sizeof(int)*size);
725   M->pe            = (int*)malloc(sizeof(int)*n);
726   M->block_sizes   = (int*)malloc(sizeof(int)*n);
727   for (i=0; i<n; i++) M->block_sizes[i] = 1;
728 
729   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
730 
731   M->start_indices[0] = 0;
732   for (i=1; i<size; i++) {
733     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
734   }
735 
736   M->mnl = M->mnls[M->myid];
737   M->my_start_index = M->start_indices[M->myid];
738 
739   for (i=0; i<size; i++) {
740     start_indx = M->start_indices[i];
741     for (j=0; j<M->mnls[i]; j++)
742       M->pe[start_indx+j] = i;
743   }
744 
745   if (AT) {
746     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
747   } else {
748     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
749   }
750 
751   rows     = M->lines;
752 
753   /* Determine the mapping from global indices to pointers */
754   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
755   pe         = 0;
756   local_indx = 0;
757   for (i=0; i<M->n; i++) {
758     if (local_indx >= M->mnls[pe]) {
759       pe++;
760       local_indx = 0;
761     }
762     mapping[i] = local_indx + M->start_indices[pe];
763     local_indx++;
764   }
765 
766 
767   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
768 
769   /*********************************************************/
770   /************** Set up the row structure *****************/
771   /*********************************************************/
772 
773   /* count number of nonzeros in every row */
774   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
775   for (i=rstart; i<rend; i++) {
776     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
777     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
778   }
779 
780   /* allocate buffers */
781   len = 0;
782   for (i=0; i<mnl; i++) {
783     if (len < num_ptr[i]) len = num_ptr[i];
784   }
785 
786   for (i=rstart; i<rend; i++) {
787     row_indx             = i-rstart;
788     len                  = num_ptr[row_indx];
789     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
790     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
791   }
792 
793   /* copy the matrix */
794   for (i=rstart; i<rend; i++) {
795     row_indx = i - rstart;
796     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
797     for (j=0; j<nz; j++) {
798       col = cols[j];
799       len = rows->len[row_indx]++;
800       rows->ptrs[row_indx][len] = mapping[col];
801       rows->A[row_indx][len]    = vals[j];
802     }
803     rows->slen[row_indx] = rows->len[row_indx];
804     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
805   }
806 
807 
808   /************************************************************/
809   /************** Set up the column structure *****************/
810   /*********************************************************/
811 
812   if (AT) {
813 
814     /* count number of nonzeros in every column */
815     for (i=rstart; i<rend; i++) {
816       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
817       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
818     }
819 
820     /* allocate buffers */
821     len = 0;
822     for (i=0; i<mnl; i++) {
823       if (len < num_ptr[i]) len = num_ptr[i];
824     }
825 
826     for (i=rstart; i<rend; i++) {
827       row_indx = i-rstart;
828       len      = num_ptr[row_indx];
829       rows->rptrs[row_indx]     = (int*)malloc(len*sizeof(int));
830     }
831 
832     /* copy the matrix (i.e., the structure) */
833     for (i=rstart; i<rend; i++) {
834       row_indx = i - rstart;
835       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
836       for (j=0; j<nz; j++) {
837 	col = cols[j];
838 	len = rows->rlen[row_indx]++;
839 	rows->rptrs[row_indx][len] = mapping[col];
840       }
841       ierr     = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
842     }
843   }
844 
845   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
846   ierr = PetscFree(mapping);CHKERRQ(ierr);
847 
848   order_pointers(M);
849   M->maxnz = calc_maxnz(M);
850 
851   *B = M;
852 
853   PetscFunctionReturn(0);
854 }
855 
856 /**********************************************************************/
857 
858 /*
859    Converts from an SPAI matrix B  to a PETSc matrix PB.
860    This assumes that the the SPAI matrix B is stored in
861    COMPRESSED-ROW format.
862 */
863 #undef __FUNCT__
864 #define __FUNCT__ "ConvertMatrixToMat"
865 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
866 {
867   int         size,rank;
868   PetscErrorCode ierr;
869   int         m,n,M,N;
870   int         d_nz,o_nz;
871   int         *d_nnz,*o_nnz;
872   int         i,k,global_row,global_col,first_diag_col,last_diag_col;
873   PetscScalar val;
874 
875   PetscFunctionBegin;
876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
878 
879   m = n = B->mnls[rank];
880   d_nz = o_nz = 0;
881 
882   /* Determine preallocation for MatCreateMPIAIJ */
883   ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr);
884   ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr);
885   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
886   first_diag_col = B->start_indices[rank];
887   last_diag_col = first_diag_col + B->mnls[rank];
888   for (i=0; i<B->mnls[rank]; i++) {
889     for (k=0; k<B->lines->len[i]; k++) {
890       global_col = B->lines->ptrs[i][k];
891       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
892 	d_nnz[i]++;
893       else
894 	o_nnz[i]++;
895     }
896   }
897 
898   M = N = B->n;
899   /* Here we only know how to create AIJ format */
900   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
901   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
902   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
903   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
904   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
905 
906   for (i=0; i<B->mnls[rank]; i++) {
907     global_row = B->start_indices[rank]+i;
908     for (k=0; k<B->lines->len[i]; k++) {
909       global_col = B->lines->ptrs[i][k];
910       val = B->lines->A[i][k];
911       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
912     }
913   }
914 
915   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
916   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
917 
918   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
919   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
920 
921   PetscFunctionReturn(0);
922 }
923 
924 /**********************************************************************/
925 
926 /*
927    Converts from an SPAI vector v  to a PETSc vec Pv.
928 */
929 #undef __FUNCT__
930 #define __FUNCT__ "ConvertVectorToVec"
931 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
932 {
933   PetscErrorCode ierr;
934   int size,rank,m,M,i,*mnls,*start_indices,*global_indices;
935 
936   PetscFunctionBegin;
937   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
938   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
939 
940   m = v->mnl;
941   M = v->n;
942 
943 
944   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
945 
946   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
947   ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr);
948 
949   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
950   start_indices[0] = 0;
951   for (i=1; i<size; i++)
952     start_indices[i] = start_indices[i-1] +mnls[i-1];
953 
954   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
955   for (i=0; i<v->mnl; i++)
956     global_indices[i] = start_indices[rank] + i;
957 
958   ierr = PetscFree(mnls);CHKERRQ(ierr);
959   ierr = PetscFree(start_indices);CHKERRQ(ierr);
960 
961   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
962 
963   ierr = PetscFree(global_indices);CHKERRQ(ierr);
964 
965   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
966   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
967 
968   PetscFunctionReturn(0);
969 }
970 
971 
972 
973 
974