xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision c8cd8b03a8e69285ecfea39db41c0ff684948fa5)
1 #define PETSCKSP_DLL
2 
3 /*
4    3/99 Modified by Stephen Barnard to support SPAI version 3.0
5 */
6 
7 /*
8       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9    Code written by Stephen Barnard.
10 
11       Note: there is some BAD memory bleeding below!
12 
13       This code needs work
14 
15    1) get rid of all memory bleeding
16    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17       rather than having the sp flag for PC_SPAI
18 
19 */
20 
21 #include "private/pcimpl.h"        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #include "spai.h"
29 #include "matrix.h"
30 EXTERN_C_END
31 
32 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
33 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
34 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
35 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);
36 
37 typedef struct {
38 
39   matrix *B;              /* matrix in SPAI format */
40   matrix *BT;             /* transpose of matrix in SPAI format */
41   matrix *M;              /* the approximate inverse in SPAI format */
42 
43   Mat    PM;              /* the approximate inverse PETSc format */
44 
45   double epsilon;         /* tolerance */
46   int    nbsteps;         /* max number of "improvement" steps per line */
47   int    max;             /* max dimensions of is_I, q, etc. */
48   int    maxnew;          /* max number of new entries per step */
49   int    block_size;      /* constant block size */
50   int    cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
51   int    verbose;         /* SPAI prints timing and statistics */
52 
53   int    sp;              /* symmetric nonzero pattern */
54   MPI_Comm comm_spai;     /* communicator to be used with spai */
55 } PC_SPAI;
56 
57 /**********************************************************************/
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "PCSetUp_SPAI"
61 static PetscErrorCode PCSetUp_SPAI(PC pc)
62 {
63   PC_SPAI *ispai = (PC_SPAI*)pc->data;
64   PetscErrorCode ierr;
65   Mat      AT;
66 
67   PetscFunctionBegin;
68 
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
93                            /* cache_size == 0 indicates no caching */
94   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
95                               verbose == 1 prints timing and matrix statistics */
96 
97   ierr = bspai(ispai->B,&ispai->M,
98 		   stdout,
99 		   ispai->epsilon,
100 		   ispai->nbsteps,
101 		   ispai->max,
102 		   ispai->maxnew,
103 		   ispai->block_size,
104 		   ispai->cache_size,
105 	       ispai->verbose); CHKERRQ(ierr);
106 
107   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
108 
109   /* free the SPAI matrices */
110   sp_free_matrix(ispai->B);
111   sp_free_matrix(ispai->M);
112 
113   PetscFunctionReturn(0);
114 }
115 
116 /**********************************************************************/
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "PCApply_SPAI"
120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
121 {
122   PC_SPAI *ispai = (PC_SPAI*)pc->data;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   /* Now using PETSc's multiply */
127   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /**********************************************************************/
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "PCDestroy_SPAI"
135 static PetscErrorCode PCDestroy_SPAI(PC pc)
136 {
137   PetscErrorCode ierr;
138   PC_SPAI *ispai = (PC_SPAI*)pc->data;
139 
140   PetscFunctionBegin;
141   if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);}
142   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
143   ierr = PetscFree(ispai);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 /**********************************************************************/
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PCView_SPAI"
151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
152 {
153   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
154   PetscErrorCode ierr;
155   PetscTruth iascii;
156 
157   PetscFunctionBegin;
158   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
159   if (iascii) {
160     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
169 
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 EXTERN_C_BEGIN
175 #undef __FUNCT__
176 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
177 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
178 {
179   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
180   PetscFunctionBegin;
181   ispai->epsilon = epsilon1;
182   PetscFunctionReturn(0);
183 }
184 EXTERN_C_END
185 
186 /**********************************************************************/
187 
188 EXTERN_C_BEGIN
189 #undef __FUNCT__
190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
191 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
192 {
193   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
194   PetscFunctionBegin;
195   ispai->nbsteps = nbsteps1;
196   PetscFunctionReturn(0);
197 }
198 EXTERN_C_END
199 
200 /**********************************************************************/
201 
202 /* added 1/7/99 g.h. */
203 EXTERN_C_BEGIN
204 #undef __FUNCT__
205 #define __FUNCT__ "PCSPAISetMax_SPAI"
206 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1)
207 {
208   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
209   PetscFunctionBegin;
210   ispai->max = max1;
211   PetscFunctionReturn(0);
212 }
213 EXTERN_C_END
214 
215 /**********************************************************************/
216 
217 EXTERN_C_BEGIN
218 #undef __FUNCT__
219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
220 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221 {
222   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
223   PetscFunctionBegin;
224   ispai->maxnew = maxnew1;
225   PetscFunctionReturn(0);
226 }
227 EXTERN_C_END
228 
229 /**********************************************************************/
230 
231 EXTERN_C_BEGIN
232 #undef __FUNCT__
233 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
234 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
235 {
236   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
237   PetscFunctionBegin;
238   ispai->block_size = block_size1;
239   PetscFunctionReturn(0);
240 }
241 EXTERN_C_END
242 
243 /**********************************************************************/
244 
245 EXTERN_C_BEGIN
246 #undef __FUNCT__
247 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
248 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
249 {
250   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
251   PetscFunctionBegin;
252   ispai->cache_size = cache_size;
253   PetscFunctionReturn(0);
254 }
255 EXTERN_C_END
256 
257 /**********************************************************************/
258 
259 EXTERN_C_BEGIN
260 #undef __FUNCT__
261 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
262 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose)
263 {
264   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
265   PetscFunctionBegin;
266   ispai->verbose = verbose;
267   PetscFunctionReturn(0);
268 }
269 EXTERN_C_END
270 
271 /**********************************************************************/
272 
273 EXTERN_C_BEGIN
274 #undef __FUNCT__
275 #define __FUNCT__ "PCSPAISetSp_SPAI"
276 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp)
277 {
278   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
279   PetscFunctionBegin;
280   ispai->sp = sp;
281   PetscFunctionReturn(0);
282 }
283 EXTERN_C_END
284 
285 /* -------------------------------------------------------------------*/
286 
287 #undef __FUNCT__
288 #define __FUNCT__ "PCSPAISetEpsilon"
289 /*@
290   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
291 
292   Input Parameters:
293 + pc - the preconditioner
294 - eps - epsilon (default .4)
295 
296   Notes:  Espilon must be between 0 and 1. It controls the
297                  quality of the approximation of M to the inverse of
298                  A. Higher values of epsilon lead to more work, more
299                  fill, and usually better preconditioners. In many
300                  cases the best choice of epsilon is the one that
301                  divides the total solution time equally between the
302                  preconditioner and the solver.
303 
304   Level: intermediate
305 
306 .seealso: PCSPAI, PCSetType()
307   @*/
308 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1)
309 {
310   PetscErrorCode ierr,(*f)(PC,double);
311   PetscFunctionBegin;
312   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr);
313   if (f) {
314     ierr = (*f)(pc,epsilon1);CHKERRQ(ierr);
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 /**********************************************************************/
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "PCSPAISetNBSteps"
323 /*@
324   PCSPAISetNBSteps - set maximum number of improvement steps per row in
325         the SPAI preconditioner
326 
327   Input Parameters:
328 + pc - the preconditioner
329 - n - number of steps (default 5)
330 
331   Notes:  SPAI constructs to approximation to every column of
332                  the exact inverse of A in a series of improvement
333                  steps. The quality of the approximation is determined
334                  by epsilon. If an approximation achieving an accuracy
335                  of epsilon is not obtained after ns steps, SPAI simply
336                  uses the best approximation constructed so far.
337 
338   Level: intermediate
339 
340 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
341 @*/
342 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1)
343 {
344   PetscErrorCode ierr,(*f)(PC,int);
345   PetscFunctionBegin;
346   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr);
347   if (f) {
348     ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr);
349   }
350   PetscFunctionReturn(0);
351 }
352 
353 /**********************************************************************/
354 
355 /* added 1/7/99 g.h. */
356 #undef __FUNCT__
357 #define __FUNCT__ "PCSPAISetMax"
358 /*@
359   PCSPAISetMax - set the size of various working buffers in
360         the SPAI preconditioner
361 
362   Input Parameters:
363 + pc - the preconditioner
364 - n - size (default is 5000)
365 
366   Level: intermediate
367 
368 .seealso: PCSPAI, PCSetType()
369 @*/
370 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1)
371 {
372   PetscErrorCode ierr,(*f)(PC,int);
373   PetscFunctionBegin;
374   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr);
375   if (f) {
376     ierr = (*f)(pc,max1);CHKERRQ(ierr);
377   }
378   PetscFunctionReturn(0);
379 }
380 
381 /**********************************************************************/
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "PCSPAISetMaxNew"
385 /*@
386   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
387    in SPAI preconditioner
388 
389   Input Parameters:
390 + pc - the preconditioner
391 - n - maximum number (default 5)
392 
393   Level: intermediate
394 
395 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
396 @*/
397 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1)
398 {
399   PetscErrorCode ierr,(*f)(PC,int);
400   PetscFunctionBegin;
401   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr);
402   if (f) {
403     ierr = (*f)(pc,maxnew1);CHKERRQ(ierr);
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 /**********************************************************************/
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCSPAISetBlockSize"
412 /*@
413   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
414 
415   Input Parameters:
416 + pc - the preconditioner
417 - n - block size (default 1)
418 
419   Notes: A block
420                  size of 1 treats A as a matrix of scalar elements. A
421                  block size of s > 1 treats A as a matrix of sxs
422                  blocks. A block size of 0 treats A as a matrix with
423                  variable sized blocks, which are determined by
424                  searching for dense square diagonal blocks in A.
425                  This can be very effective for finite-element
426                  matrices.
427 
428                  SPAI will convert A to block form, use a block
429                  version of the preconditioner algorithm, and then
430                  convert the result back to scalar form.
431 
432                  In many cases the a block-size parameter other than 1
433                  can lead to very significant improvement in
434                  performance.
435 
436 
437   Level: intermediate
438 
439 .seealso: PCSPAI, PCSetType()
440 @*/
441 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1)
442 {
443   PetscErrorCode ierr,(*f)(PC,int);
444   PetscFunctionBegin;
445   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr);
446   if (f) {
447     ierr = (*f)(pc,block_size1);CHKERRQ(ierr);
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 /**********************************************************************/
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "PCSPAISetCacheSize"
456 /*@
457   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
458 
459   Input Parameters:
460 + pc - the preconditioner
461 - n -  cache size {0,1,2,3,4,5} (default 5)
462 
463   Notes:    SPAI uses a hash table to cache messages and avoid
464                  redundant communication. If suggest always using
465                  5. This parameter is irrelevant in the serial
466                  version.
467 
468   Level: intermediate
469 
470 .seealso: PCSPAI, PCSetType()
471 @*/
472 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size)
473 {
474   PetscErrorCode ierr,(*f)(PC,int);
475   PetscFunctionBegin;
476   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr);
477   if (f) {
478     ierr = (*f)(pc,cache_size);CHKERRQ(ierr);
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 /**********************************************************************/
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCSPAISetVerbose"
487 /*@
488   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
489 
490   Input Parameters:
491 + pc - the preconditioner
492 - n - level (default 1)
493 
494   Notes: print parameters, timings and matrix statistics
495 
496   Level: intermediate
497 
498 .seealso: PCSPAI, PCSetType()
499 @*/
500 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose)
501 {
502   PetscErrorCode ierr,(*f)(PC,int);
503   PetscFunctionBegin;
504   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr);
505   if (f) {
506     ierr = (*f)(pc,verbose);CHKERRQ(ierr);
507   }
508   PetscFunctionReturn(0);
509 }
510 
511 /**********************************************************************/
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "PCSPAISetSp"
515 /*@
516   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
517 
518   Input Parameters:
519 + pc - the preconditioner
520 - n - 0 or 1
521 
522   Notes: If A has a symmetric nonzero pattern use -sp 1 to
523                  improve performance by eliminating some communication
524                  in the parallel version. Even if A does not have a
525                  symmetric nonzero pattern -sp 1 may well lead to good
526                  results, but the code will not follow the published
527                  SPAI algorithm exactly.
528 
529 
530   Level: intermediate
531 
532 .seealso: PCSPAI, PCSetType()
533 @*/
534 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp)
535 {
536   PetscErrorCode ierr,(*f)(PC,int);
537   PetscFunctionBegin;
538   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr);
539   if (f) {
540     ierr = (*f)(pc,sp);CHKERRQ(ierr);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 /**********************************************************************/
546 
547 /**********************************************************************/
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "PCSetFromOptions_SPAI"
551 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
552 {
553   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
554   PetscErrorCode ierr;
555   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
556   double         epsilon1;
557   PetscTruth     flg;
558 
559   PetscFunctionBegin;
560   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
561     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
562     if (flg) {
563       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
564     }
565     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
566     if (flg) {
567       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
568     }
569     /* added 1/7/99 g.h. */
570     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
571     if (flg) {
572       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
573     }
574     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
575     if (flg) {
576       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
577     }
578     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
579     if (flg) {
580       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
581     }
582     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
583     if (flg) {
584       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
585     }
586     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
587     if (flg) {
588       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
589     }
590     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
591     if (flg) {
592       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
593     }
594   ierr = PetscOptionsTail();CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 /**********************************************************************/
599 
600 /*MC
601    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
602      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
603 
604    Options Database Keys:
605 +  -pc_spai_set_epsilon <eps> - set tolerance
606 .  -pc_spai_nbstep <n> - set nbsteps
607 .  -pc_spai_max <m> - set max
608 .  -pc_spai_max_new <m> - set maxnew
609 .  -pc_spai_block_size <n> - set block size
610 .  -pc_spai_cache_size <n> - set cache size
611 .  -pc_spai_sp <m> - set sp
612 -  -pc_spai_set_verbose <true,false> - verbose output
613 
614    Notes: This only works with AIJ matrices.
615 
616    Level: beginner
617 
618    Concepts: approximate inverse
619 
620 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
621     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
622     PCSPAISetVerbose(), PCSPAISetSp()
623 M*/
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "PCCreate_SPAI"
628 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc)
629 {
630   PC_SPAI *ispai;
631   PetscErrorCode ierr;
632 
633   PetscFunctionBegin;
634   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
635   pc->data           = (void*)ispai;
636 
637   pc->ops->destroy         = PCDestroy_SPAI;
638   pc->ops->apply           = PCApply_SPAI;
639   pc->ops->applyrichardson = 0;
640   pc->ops->setup           = PCSetUp_SPAI;
641   pc->ops->view            = PCView_SPAI;
642   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
643 
644   ispai->epsilon    = .4;
645   ispai->nbsteps    = 5;
646   ispai->max        = 5000;
647   ispai->maxnew     = 5;
648   ispai->block_size = 1;
649   ispai->cache_size = 5;
650   ispai->verbose    = 0;
651 
652   ispai->sp         = 1;
653   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
654 
655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
656                     "PCSPAISetEpsilon_SPAI",
657                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
659                     "PCSPAISetNBSteps_SPAI",
660                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
661   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
662                     "PCSPAISetMax_SPAI",
663                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
664   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
665                     "PCSPAISetMaxNew_SPAI",
666                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
668                     "PCSPAISetBlockSize_SPAI",
669                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
671                     "PCSPAISetCacheSize_SPAI",
672                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
674                     "PCSPAISetVerbose_SPAI",
675                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
677                     "PCSPAISetSp_SPAI",
678                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
679 
680   PetscFunctionReturn(0);
681 }
682 EXTERN_C_END
683 
684 /**********************************************************************/
685 
686 /*
687    Converts from a PETSc matrix to an SPAI matrix
688 */
689 #undef __FUNCT__
690 #define __FUNCT__ "ConvertMatToMatrix"
691 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
692 {
693   matrix                  *M;
694   int                     i,j,col;
695   int                     row_indx;
696   int                     len,pe,local_indx,start_indx;
697   int                     *mapping;
698   PetscErrorCode ierr;
699   const int               *cols;
700   const double            *vals;
701   int                     *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend;
702   struct compressed_lines *rows;
703 
704   PetscFunctionBegin;
705 
706   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
707   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
708   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
709   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
710 
711   /*
712     not sure why a barrier is required. commenting out
713   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
714   */
715 
716   M = new_matrix((void*)comm);
717 
718   M->n = n;
719   M->bs = 1;
720   M->max_block_size = 1;
721 
722   M->mnls          = (int*)malloc(sizeof(int)*size);
723   M->start_indices = (int*)malloc(sizeof(int)*size);
724   M->pe            = (int*)malloc(sizeof(int)*n);
725   M->block_sizes   = (int*)malloc(sizeof(int)*n);
726   for (i=0; i<n; i++) M->block_sizes[i] = 1;
727 
728   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
729 
730   M->start_indices[0] = 0;
731   for (i=1; i<size; i++) {
732     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
733   }
734 
735   M->mnl = M->mnls[M->myid];
736   M->my_start_index = M->start_indices[M->myid];
737 
738   for (i=0; i<size; i++) {
739     start_indx = M->start_indices[i];
740     for (j=0; j<M->mnls[i]; j++)
741       M->pe[start_indx+j] = i;
742   }
743 
744   if (AT) {
745     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
746   } else {
747     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
748   }
749 
750   rows     = M->lines;
751 
752   /* Determine the mapping from global indices to pointers */
753   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
754   pe         = 0;
755   local_indx = 0;
756   for (i=0; i<M->n; i++) {
757     if (local_indx >= M->mnls[pe]) {
758       pe++;
759       local_indx = 0;
760     }
761     mapping[i] = local_indx + M->start_indices[pe];
762     local_indx++;
763   }
764 
765 
766   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
767 
768   /*********************************************************/
769   /************** Set up the row structure *****************/
770   /*********************************************************/
771 
772   /* count number of nonzeros in every row */
773   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
774   for (i=rstart; i<rend; i++) {
775     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
776     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
777   }
778 
779   /* allocate buffers */
780   len = 0;
781   for (i=0; i<mnl; i++) {
782     if (len < num_ptr[i]) len = num_ptr[i];
783   }
784 
785   for (i=rstart; i<rend; i++) {
786     row_indx             = i-rstart;
787     len                  = num_ptr[row_indx];
788     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
789     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
790   }
791 
792   /* copy the matrix */
793   for (i=rstart; i<rend; i++) {
794     row_indx = i - rstart;
795     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
796     for (j=0; j<nz; j++) {
797       col = cols[j];
798       len = rows->len[row_indx]++;
799       rows->ptrs[row_indx][len] = mapping[col];
800       rows->A[row_indx][len]    = vals[j];
801     }
802     rows->slen[row_indx] = rows->len[row_indx];
803     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
804   }
805 
806 
807   /************************************************************/
808   /************** Set up the column structure *****************/
809   /*********************************************************/
810 
811   if (AT) {
812 
813     /* count number of nonzeros in every column */
814     for (i=rstart; i<rend; i++) {
815       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
816       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
817     }
818 
819     /* allocate buffers */
820     len = 0;
821     for (i=0; i<mnl; i++) {
822       if (len < num_ptr[i]) len = num_ptr[i];
823     }
824 
825     for (i=rstart; i<rend; i++) {
826       row_indx = i-rstart;
827       len      = num_ptr[row_indx];
828       rows->rptrs[row_indx]     = (int*)malloc(len*sizeof(int));
829     }
830 
831     /* copy the matrix (i.e., the structure) */
832     for (i=rstart; i<rend; i++) {
833       row_indx = i - rstart;
834       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
835       for (j=0; j<nz; j++) {
836 	col = cols[j];
837 	len = rows->rlen[row_indx]++;
838 	rows->rptrs[row_indx][len] = mapping[col];
839       }
840       ierr     = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
841     }
842   }
843 
844   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
845   ierr = PetscFree(mapping);CHKERRQ(ierr);
846 
847   order_pointers(M);
848   M->maxnz = calc_maxnz(M);
849 
850   *B = M;
851 
852   PetscFunctionReturn(0);
853 }
854 
855 /**********************************************************************/
856 
857 /*
858    Converts from an SPAI matrix B  to a PETSc matrix PB.
859    This assumes that the the SPAI matrix B is stored in
860    COMPRESSED-ROW format.
861 */
862 #undef __FUNCT__
863 #define __FUNCT__ "ConvertMatrixToMat"
864 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
865 {
866   int         size,rank;
867   PetscErrorCode ierr;
868   int         m,n,M,N;
869   int         d_nz,o_nz;
870   int         *d_nnz,*o_nnz;
871   int         i,k,global_row,global_col,first_diag_col,last_diag_col;
872   PetscScalar val;
873 
874   PetscFunctionBegin;
875   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
876   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
877 
878   m = n = B->mnls[rank];
879   d_nz = o_nz = 0;
880 
881   /* Determine preallocation for MatCreateMPIAIJ */
882   ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr);
883   ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr);
884   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
885   first_diag_col = B->start_indices[rank];
886   last_diag_col = first_diag_col + B->mnls[rank];
887   for (i=0; i<B->mnls[rank]; i++) {
888     for (k=0; k<B->lines->len[i]; k++) {
889       global_col = B->lines->ptrs[i][k];
890       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
891 	d_nnz[i]++;
892       else
893 	o_nnz[i]++;
894     }
895   }
896 
897   M = N = B->n;
898   /* Here we only know how to create AIJ format */
899   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
900   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
901   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
902   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
903   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
904 
905   for (i=0; i<B->mnls[rank]; i++) {
906     global_row = B->start_indices[rank]+i;
907     for (k=0; k<B->lines->len[i]; k++) {
908       global_col = B->lines->ptrs[i][k];
909       val = B->lines->A[i][k];
910       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
911     }
912   }
913 
914   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
915   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
916 
917   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
918   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
919 
920   PetscFunctionReturn(0);
921 }
922 
923 /**********************************************************************/
924 
925 /*
926    Converts from an SPAI vector v  to a PETSc vec Pv.
927 */
928 #undef __FUNCT__
929 #define __FUNCT__ "ConvertVectorToVec"
930 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
931 {
932   PetscErrorCode ierr;
933   int size,rank,m,M,i,*mnls,*start_indices,*global_indices;
934 
935   PetscFunctionBegin;
936   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
937   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
938 
939   m = v->mnl;
940   M = v->n;
941 
942 
943   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
944 
945   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
946   ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr);
947 
948   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
949   start_indices[0] = 0;
950   for (i=1; i<size; i++)
951     start_indices[i] = start_indices[i-1] +mnls[i-1];
952 
953   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
954   for (i=0; i<v->mnl; i++)
955     global_indices[i] = start_indices[rank] + i;
956 
957   ierr = PetscFree(mnls);CHKERRQ(ierr);
958   ierr = PetscFree(start_indices);CHKERRQ(ierr);
959 
960   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
961 
962   ierr = PetscFree(global_indices);CHKERRQ(ierr);
963 
964   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
965   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
966 
967   PetscFunctionReturn(0);
968 }
969 
970 
971 
972 
973