xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 22612f2f7cceb60caedd65384cdf99fc989f2aeb)
1 #define PETSCKSP_DLL
2 
3 /*
4    3/99 Modified by Stephen Barnard to support SPAI version 3.0
5 */
6 
7 /*
8       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9    Code written by Stephen Barnard.
10 
11       Note: there is some BAD memory bleeding below!
12 
13       This code needs work
14 
15    1) get rid of all memory bleeding
16    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17       rather than having the sp flag for PC_SPAI
18 
19 */
20 
21 #include "private/pcimpl.h"        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #include "spai.h"
29 #include "matrix.h"
30 EXTERN_C_END
31 
32 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
33 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
34 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
35 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);
36 
37 typedef struct {
38 
39   matrix *B;              /* matrix in SPAI format */
40   matrix *BT;             /* transpose of matrix in SPAI format */
41   matrix *M;              /* the approximate inverse in SPAI format */
42 
43   Mat    PM;              /* the approximate inverse PETSc format */
44 
45   double epsilon;         /* tolerance */
46   int    nbsteps;         /* max number of "improvement" steps per line */
47   int    max;             /* max dimensions of is_I, q, etc. */
48   int    maxnew;          /* max number of new entries per step */
49   int    block_size;      /* constant block size */
50   int    cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
51   int    verbose;         /* SPAI prints timing and statistics */
52 
53   int    sp;              /* symmetric nonzero pattern */
54   MPI_Comm comm_spai;     /* communicator to be used with spai */
55 } PC_SPAI;
56 
57 /**********************************************************************/
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "PCSetUp_SPAI"
61 static PetscErrorCode PCSetUp_SPAI(PC pc)
62 {
63   PC_SPAI *ispai = (PC_SPAI*)pc->data;
64   PetscErrorCode ierr;
65   Mat      AT;
66 
67   PetscFunctionBegin;
68 
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
93                            /* cache_size == 0 indicates no caching */
94   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
95                               verbose == 1 prints timing and matrix statistics */
96 
97   ierr = bspai(ispai->B,&ispai->M,
98 		   stdout,
99 		   ispai->epsilon,
100 		   ispai->nbsteps,
101 		   ispai->max,
102 		   ispai->maxnew,
103 		   ispai->block_size,
104 		   ispai->cache_size,
105 	       ispai->verbose); CHKERRQ(ierr);
106 
107   ierr = ConvertMatrixToMat(pc->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
108 
109   /* free the SPAI matrices */
110   sp_free_matrix(ispai->B);
111   sp_free_matrix(ispai->M);
112 
113   PetscFunctionReturn(0);
114 }
115 
116 /**********************************************************************/
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "PCApply_SPAI"
120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
121 {
122   PC_SPAI *ispai = (PC_SPAI*)pc->data;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   /* Now using PETSc's multiply */
127   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /**********************************************************************/
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "PCDestroy_SPAI"
135 static PetscErrorCode PCDestroy_SPAI(PC pc)
136 {
137   PetscErrorCode ierr;
138   PC_SPAI *ispai = (PC_SPAI*)pc->data;
139 
140   PetscFunctionBegin;
141   if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);}
142   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
143   ierr = PetscFree(ispai);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 /**********************************************************************/
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PCView_SPAI"
151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
152 {
153   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
154   PetscErrorCode ierr;
155   PetscTruth iascii;
156 
157   PetscFunctionBegin;
158   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
159   if (iascii) {
160     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
169 
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 EXTERN_C_BEGIN
175 #undef __FUNCT__
176 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
177 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
178 {
179   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
180   PetscFunctionBegin;
181   ispai->epsilon = epsilon1;
182   PetscFunctionReturn(0);
183 }
184 EXTERN_C_END
185 
186 /**********************************************************************/
187 
188 EXTERN_C_BEGIN
189 #undef __FUNCT__
190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
191 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
192 {
193   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
194   PetscFunctionBegin;
195   ispai->nbsteps = nbsteps1;
196   PetscFunctionReturn(0);
197 }
198 EXTERN_C_END
199 
200 /**********************************************************************/
201 
202 /* added 1/7/99 g.h. */
203 EXTERN_C_BEGIN
204 #undef __FUNCT__
205 #define __FUNCT__ "PCSPAISetMax_SPAI"
206 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1)
207 {
208   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
209   PetscFunctionBegin;
210   ispai->max = max1;
211   PetscFunctionReturn(0);
212 }
213 EXTERN_C_END
214 
215 /**********************************************************************/
216 
217 EXTERN_C_BEGIN
218 #undef __FUNCT__
219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
220 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221 {
222   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
223   PetscFunctionBegin;
224   ispai->maxnew = maxnew1;
225   PetscFunctionReturn(0);
226 }
227 EXTERN_C_END
228 
229 /**********************************************************************/
230 
231 EXTERN_C_BEGIN
232 #undef __FUNCT__
233 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
234 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
235 {
236   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
237   PetscFunctionBegin;
238   ispai->block_size = block_size1;
239   PetscFunctionReturn(0);
240 }
241 EXTERN_C_END
242 
243 /**********************************************************************/
244 
245 EXTERN_C_BEGIN
246 #undef __FUNCT__
247 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
248 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
249 {
250   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
251   PetscFunctionBegin;
252   ispai->cache_size = cache_size;
253   PetscFunctionReturn(0);
254 }
255 EXTERN_C_END
256 
257 /**********************************************************************/
258 
259 EXTERN_C_BEGIN
260 #undef __FUNCT__
261 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
262 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose)
263 {
264   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
265   PetscFunctionBegin;
266   ispai->verbose = verbose;
267   PetscFunctionReturn(0);
268 }
269 EXTERN_C_END
270 
271 /**********************************************************************/
272 
273 EXTERN_C_BEGIN
274 #undef __FUNCT__
275 #define __FUNCT__ "PCSPAISetSp_SPAI"
276 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp)
277 {
278   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
279   PetscFunctionBegin;
280   ispai->sp = sp;
281   PetscFunctionReturn(0);
282 }
283 EXTERN_C_END
284 
285 /* -------------------------------------------------------------------*/
286 
287 #undef __FUNCT__
288 #define __FUNCT__ "PCSPAISetEpsilon"
289 /*@
290   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
291 
292   Input Parameters:
293 + pc - the preconditioner
294 - eps - epsilon (default .4)
295 
296   Notes:  Espilon must be between 0 and 1. It controls the
297                  quality of the approximation of M to the inverse of
298                  A. Higher values of epsilon lead to more work, more
299                  fill, and usually better preconditioners. In many
300                  cases the best choice of epsilon is the one that
301                  divides the total solution time equally between the
302                  preconditioner and the solver.
303 
304   Level: intermediate
305 
306 .seealso: PCSPAI, PCSetType()
307   @*/
308 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1)
309 {
310   PetscErrorCode ierr,(*f)(PC,double);
311   PetscFunctionBegin;
312   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr);
313   if (f) {
314     ierr = (*f)(pc,epsilon1);CHKERRQ(ierr);
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 /**********************************************************************/
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "PCSPAISetNBSteps"
323 /*@
324   PCSPAISetNBSteps - set maximum number of improvement steps per row in
325         the SPAI preconditioner
326 
327   Input Parameters:
328 + pc - the preconditioner
329 - n - number of steps (default 5)
330 
331   Notes:  SPAI constructs to approximation to every column of
332                  the exact inverse of A in a series of improvement
333                  steps. The quality of the approximation is determined
334                  by epsilon. If an approximation achieving an accuracy
335                  of epsilon is not obtained after ns steps, SPAI simply
336                  uses the best approximation constructed so far.
337 
338   Level: intermediate
339 
340 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
341 @*/
342 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1)
343 {
344   PetscErrorCode ierr,(*f)(PC,int);
345   PetscFunctionBegin;
346   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr);
347   if (f) {
348     ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr);
349   }
350   PetscFunctionReturn(0);
351 }
352 
353 /**********************************************************************/
354 
355 /* added 1/7/99 g.h. */
356 #undef __FUNCT__
357 #define __FUNCT__ "PCSPAISetMax"
358 /*@
359   PCSPAISetMax - set the size of various working buffers in
360         the SPAI preconditioner
361 
362   Input Parameters:
363 + pc - the preconditioner
364 - n - size (default is 5000)
365 
366   Level: intermediate
367 
368 .seealso: PCSPAI, PCSetType()
369 @*/
370 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1)
371 {
372   PetscErrorCode ierr,(*f)(PC,int);
373   PetscFunctionBegin;
374   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr);
375   if (f) {
376     ierr = (*f)(pc,max1);CHKERRQ(ierr);
377   }
378   PetscFunctionReturn(0);
379 }
380 
381 /**********************************************************************/
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "PCSPAISetMaxNew"
385 /*@
386   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
387    in SPAI preconditioner
388 
389   Input Parameters:
390 + pc - the preconditioner
391 - n - maximum number (default 5)
392 
393   Level: intermediate
394 
395 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
396 @*/
397 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1)
398 {
399   PetscErrorCode ierr,(*f)(PC,int);
400   PetscFunctionBegin;
401   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr);
402   if (f) {
403     ierr = (*f)(pc,maxnew1);CHKERRQ(ierr);
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 /**********************************************************************/
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCSPAISetBlockSize"
412 /*@
413   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
414 
415   Input Parameters:
416 + pc - the preconditioner
417 - n - block size (default 1)
418 
419   Notes: A block
420                  size of 1 treats A as a matrix of scalar elements. A
421                  block size of s > 1 treats A as a matrix of sxs
422                  blocks. A block size of 0 treats A as a matrix with
423                  variable sized blocks, which are determined by
424                  searching for dense square diagonal blocks in A.
425                  This can be very effective for finite-element
426                  matrices.
427 
428                  SPAI will convert A to block form, use a block
429                  version of the preconditioner algorithm, and then
430                  convert the result back to scalar form.
431 
432                  In many cases the a block-size parameter other than 1
433                  can lead to very significant improvement in
434                  performance.
435 
436 
437   Level: intermediate
438 
439 .seealso: PCSPAI, PCSetType()
440 @*/
441 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1)
442 {
443   PetscErrorCode ierr,(*f)(PC,int);
444   PetscFunctionBegin;
445   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr);
446   if (f) {
447     ierr = (*f)(pc,block_size1);CHKERRQ(ierr);
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 /**********************************************************************/
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "PCSPAISetCacheSize"
456 /*@
457   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
458 
459   Input Parameters:
460 + pc - the preconditioner
461 - n -  cache size {0,1,2,3,4,5} (default 5)
462 
463   Notes:    SPAI uses a hash table to cache messages and avoid
464                  redundant communication. If suggest always using
465                  5. This parameter is irrelevant in the serial
466                  version.
467 
468   Level: intermediate
469 
470 .seealso: PCSPAI, PCSetType()
471 @*/
472 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size)
473 {
474   PetscErrorCode ierr,(*f)(PC,int);
475   PetscFunctionBegin;
476   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr);
477   if (f) {
478     ierr = (*f)(pc,cache_size);CHKERRQ(ierr);
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 /**********************************************************************/
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCSPAISetVerbose"
487 /*@
488   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
489 
490   Input Parameters:
491 + pc - the preconditioner
492 - n - level (default 1)
493 
494   Notes: print parameters, timings and matrix statistics
495 
496   Level: intermediate
497 
498 .seealso: PCSPAI, PCSetType()
499 @*/
500 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose)
501 {
502   PetscErrorCode ierr,(*f)(PC,int);
503   PetscFunctionBegin;
504   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr);
505   if (f) {
506     ierr = (*f)(pc,verbose);CHKERRQ(ierr);
507   }
508   PetscFunctionReturn(0);
509 }
510 
511 /**********************************************************************/
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "PCSPAISetSp"
515 /*@
516   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
517 
518   Input Parameters:
519 + pc - the preconditioner
520 - n - 0 or 1
521 
522   Notes: If A has a symmetric nonzero pattern use -sp 1 to
523                  improve performance by eliminating some communication
524                  in the parallel version. Even if A does not have a
525                  symmetric nonzero pattern -sp 1 may well lead to good
526                  results, but the code will not follow the published
527                  SPAI algorithm exactly.
528 
529 
530   Level: intermediate
531 
532 .seealso: PCSPAI, PCSetType()
533 @*/
534 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp)
535 {
536   PetscErrorCode ierr,(*f)(PC,int);
537   PetscFunctionBegin;
538   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr);
539   if (f) {
540     ierr = (*f)(pc,sp);CHKERRQ(ierr);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 /**********************************************************************/
546 
547 /**********************************************************************/
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "PCSetFromOptions_SPAI"
551 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
552 {
553   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
554   PetscErrorCode ierr;
555   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
556   double         epsilon1;
557   PetscTruth     flg;
558 
559   PetscFunctionBegin;
560   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
561     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
562     if (flg) {
563       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
564     }
565     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
566     if (flg) {
567       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
568     }
569     /* added 1/7/99 g.h. */
570     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
571     if (flg) {
572       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
573     }
574     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
575     if (flg) {
576       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
577     }
578     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
579     if (flg) {
580       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
581     }
582     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
583     if (flg) {
584       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
585     }
586     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
587     if (flg) {
588       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
589     }
590     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
591     if (flg) {
592       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
593     }
594   ierr = PetscOptionsTail();CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 /**********************************************************************/
599 
600 /*MC
601    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
602      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
603 
604    Options Database Keys:
605 +  -pc_spai_set_epsilon <eps> - set tolerance
606 .  -pc_spai_nbstep <n> - set nbsteps
607 .  -pc_spai_max <m> - set max
608 .  -pc_spai_max_new <m> - set maxnew
609 .  -pc_spai_block_size <n> - set block size
610 .  -pc_spai_cache_size <n> - set cache size
611 .  -pc_spai_sp <m> - set sp
612 -  -pc_spai_set_verbose <true,false> - verbose output
613 
614    Notes: This only works with AIJ matrices.
615 
616    Level: beginner
617 
618    Concepts: approximate inverse
619 
620 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
621     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
622     PCSPAISetVerbose(), PCSPAISetSp()
623 M*/
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "PCCreate_SPAI"
628 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc)
629 {
630   PC_SPAI *ispai;
631   PetscErrorCode ierr;
632 
633   PetscFunctionBegin;
634   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
635   pc->data           = (void*)ispai;
636 
637   pc->ops->destroy         = PCDestroy_SPAI;
638   pc->ops->apply           = PCApply_SPAI;
639   pc->ops->applyrichardson = 0;
640   pc->ops->setup           = PCSetUp_SPAI;
641   pc->ops->view            = PCView_SPAI;
642   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
643 
644   pc->name          = 0;
645   ispai->epsilon    = .4;
646   ispai->nbsteps    = 5;
647   ispai->max        = 5000;
648   ispai->maxnew     = 5;
649   ispai->block_size = 1;
650   ispai->cache_size = 5;
651   ispai->verbose    = 0;
652 
653   ispai->sp         = 1;
654   ierr = MPI_Comm_dup(pc->comm,&(ispai->comm_spai));CHKERRQ(ierr);
655 
656   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
657                     "PCSPAISetEpsilon_SPAI",
658                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
659   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
660                     "PCSPAISetNBSteps_SPAI",
661                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
662   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
663                     "PCSPAISetMax_SPAI",
664                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
665   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
666                     "PCSPAISetMaxNew_SPAI",
667                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
668   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
669                     "PCSPAISetBlockSize_SPAI",
670                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
671   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
672                     "PCSPAISetCacheSize_SPAI",
673                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
674   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
675                     "PCSPAISetVerbose_SPAI",
676                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
677   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
678                     "PCSPAISetSp_SPAI",
679                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
680 
681   PetscFunctionReturn(0);
682 }
683 EXTERN_C_END
684 
685 /**********************************************************************/
686 
687 /*
688    Converts from a PETSc matrix to an SPAI matrix
689 */
690 #undef __FUNCT__
691 #define __FUNCT__ "ConvertMatToMatrix"
692 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
693 {
694   matrix                  *M;
695   int                     i,j,col;
696   int                     row_indx;
697   int                     len,pe,local_indx,start_indx;
698   int                     *mapping;
699   PetscErrorCode ierr;
700   const int               *cols;
701   const double            *vals;
702   int                     *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend;
703   struct compressed_lines *rows;
704 
705   PetscFunctionBegin;
706 
707   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
709   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
710   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
711 
712   /*
713     not sure why a barrier is required. commenting out
714   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
715   */
716 
717   M = new_matrix((void*)comm);
718 
719   M->n = n;
720   M->bs = 1;
721   M->max_block_size = 1;
722 
723   M->mnls          = (int*)malloc(sizeof(int)*size);
724   M->start_indices = (int*)malloc(sizeof(int)*size);
725   M->pe            = (int*)malloc(sizeof(int)*n);
726   M->block_sizes   = (int*)malloc(sizeof(int)*n);
727   for (i=0; i<n; i++) M->block_sizes[i] = 1;
728 
729   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
730 
731   M->start_indices[0] = 0;
732   for (i=1; i<size; i++) {
733     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
734   }
735 
736   M->mnl = M->mnls[M->myid];
737   M->my_start_index = M->start_indices[M->myid];
738 
739   for (i=0; i<size; i++) {
740     start_indx = M->start_indices[i];
741     for (j=0; j<M->mnls[i]; j++)
742       M->pe[start_indx+j] = i;
743   }
744 
745   if (AT) {
746     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
747   } else {
748     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
749   }
750 
751   rows     = M->lines;
752 
753   /* Determine the mapping from global indices to pointers */
754   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
755   pe         = 0;
756   local_indx = 0;
757   for (i=0; i<M->n; i++) {
758     if (local_indx >= M->mnls[pe]) {
759       pe++;
760       local_indx = 0;
761     }
762     mapping[i] = local_indx + M->start_indices[pe];
763     local_indx++;
764   }
765 
766 
767   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
768 
769   /*********************************************************/
770   /************** Set up the row structure *****************/
771   /*********************************************************/
772 
773   /* count number of nonzeros in every row */
774   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
775   for (i=rstart; i<rend; i++) {
776     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
777     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
778   }
779 
780   /* allocate buffers */
781   len = 0;
782   for (i=0; i<mnl; i++) {
783     if (len < num_ptr[i]) len = num_ptr[i];
784   }
785 
786   for (i=rstart; i<rend; i++) {
787     row_indx             = i-rstart;
788     len                  = num_ptr[row_indx];
789     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
790     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
791   }
792 
793   /* copy the matrix */
794   for (i=rstart; i<rend; i++) {
795     row_indx = i - rstart;
796     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
797     for (j=0; j<nz; j++) {
798       col = cols[j];
799       len = rows->len[row_indx]++;
800       rows->ptrs[row_indx][len] = mapping[col];
801       rows->A[row_indx][len]    = vals[j];
802     }
803     rows->slen[row_indx] = rows->len[row_indx];
804     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
805   }
806 
807 
808   /************************************************************/
809   /************** Set up the column structure *****************/
810   /*********************************************************/
811 
812   if (AT) {
813 
814     /* count number of nonzeros in every column */
815     for (i=rstart; i<rend; i++) {
816       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
817       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
818     }
819 
820     /* allocate buffers */
821     len = 0;
822     for (i=0; i<mnl; i++) {
823       if (len < num_ptr[i]) len = num_ptr[i];
824     }
825 
826     for (i=rstart; i<rend; i++) {
827       row_indx = i-rstart;
828       len      = num_ptr[row_indx];
829       rows->rptrs[row_indx]     = (int*)malloc(len*sizeof(int));
830     }
831 
832     /* copy the matrix (i.e., the structure) */
833     for (i=rstart; i<rend; i++) {
834       row_indx = i - rstart;
835       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
836       for (j=0; j<nz; j++) {
837 	col = cols[j];
838 	len = rows->rlen[row_indx]++;
839 	rows->rptrs[row_indx][len] = mapping[col];
840       }
841       ierr     = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
842     }
843   }
844 
845   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
846   ierr = PetscFree(mapping);CHKERRQ(ierr);
847 
848   order_pointers(M);
849   M->maxnz = calc_maxnz(M);
850 
851   *B = M;
852 
853   PetscFunctionReturn(0);
854 }
855 
856 /**********************************************************************/
857 
858 /*
859    Converts from an SPAI matrix B  to a PETSc matrix PB.
860    This assumes that the the SPAI matrix B is stored in
861    COMPRESSED-ROW format.
862 */
863 #undef __FUNCT__
864 #define __FUNCT__ "ConvertMatrixToMat"
865 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
866 {
867   int         size,rank;
868   PetscErrorCode ierr;
869   int         m,n,M,N;
870   int         d_nz,o_nz;
871   int         *d_nnz,*o_nnz;
872   int         i,k,global_row,global_col,first_diag_col,last_diag_col;
873   PetscScalar val;
874 
875   PetscFunctionBegin;
876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
878 
879   m = n = B->mnls[rank];
880   d_nz = o_nz = 0;
881 
882   /* Determine preallocation for MatCreateMPIAIJ */
883   ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr);
884   ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr);
885   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
886   first_diag_col = B->start_indices[rank];
887   last_diag_col = first_diag_col + B->mnls[rank];
888   for (i=0; i<B->mnls[rank]; i++) {
889     for (k=0; k<B->lines->len[i]; k++) {
890       global_col = B->lines->ptrs[i][k];
891       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
892 	d_nnz[i]++;
893       else
894 	o_nnz[i]++;
895     }
896   }
897 
898   M = N = B->n;
899   /* Here we only know how to create AIJ format */
900   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
901   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
902   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
903   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
904   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
905 
906   for (i=0; i<B->mnls[rank]; i++) {
907     global_row = B->start_indices[rank]+i;
908     for (k=0; k<B->lines->len[i]; k++) {
909       global_col = B->lines->ptrs[i][k];
910       val = B->lines->A[i][k];
911       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
912     }
913   }
914 
915   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
916   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
917 
918   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
919   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
920 
921   PetscFunctionReturn(0);
922 }
923 
924 /**********************************************************************/
925 
926 /*
927    Converts from an SPAI vector v  to a PETSc vec Pv.
928 */
929 #undef __FUNCT__
930 #define __FUNCT__ "ConvertVectorToVec"
931 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
932 {
933   PetscErrorCode ierr;
934   int size,rank,m,M,i,*mnls,*start_indices,*global_indices;
935 
936   PetscFunctionBegin;
937   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
938   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
939 
940   m = v->mnl;
941   M = v->n;
942 
943 
944   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
945 
946   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
947   ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr);
948 
949   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
950   start_indices[0] = 0;
951   for (i=1; i<size; i++)
952     start_indices[i] = start_indices[i-1] +mnls[i-1];
953 
954   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
955   for (i=0; i<v->mnl; i++)
956     global_indices[i] = start_indices[rank] + i;
957 
958   ierr = PetscFree(mnls);CHKERRQ(ierr);
959   ierr = PetscFree(start_indices);CHKERRQ(ierr);
960 
961   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
962 
963   ierr = PetscFree(global_indices);CHKERRQ(ierr);
964 
965   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
966   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
967 
968   PetscFunctionReturn(0);
969 }
970 
971 
972 
973 
974