xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21 
22 #include <petsc/private/pcimpl.h>        /*I "petscpc.h" I*/
23 #include <../src/ksp/pc/impls/spai/petscspai.h>
24 
25 /*
26     These are the SPAI include files
27 */
28 EXTERN_C_BEGIN
29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30 #include <spai.h>
31 #include <matrix.h>
32 EXTERN_C_END
33 
34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
38 
39 typedef struct {
40 
41   matrix *B;                /* matrix in SPAI format */
42   matrix *BT;               /* transpose of matrix in SPAI format */
43   matrix *M;                /* the approximate inverse in SPAI format */
44 
45   Mat PM;                   /* the approximate inverse PETSc format */
46 
47   double epsilon;           /* tolerance */
48   int    nbsteps;           /* max number of "improvement" steps per line */
49   int    max;               /* max dimensions of is_I, q, etc. */
50   int    maxnew;            /* max number of new entries per step */
51   int    block_size;        /* constant block size */
52   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
53   int    verbose;           /* SPAI prints timing and statistics */
54 
55   int      sp;              /* symmetric nonzero pattern */
56   MPI_Comm comm_spai;     /* communicator to be used with spai */
57 } PC_SPAI;
58 
59 /**********************************************************************/
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "PCSetUp_SPAI"
63 static PetscErrorCode PCSetUp_SPAI(PC pc)
64 {
65   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
66   PetscErrorCode ierr;
67   Mat            AT;
68 
69   PetscFunctionBegin;
70   init_SPAI();
71 
72   if (ispai->sp) {
73     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
74   } else {
75     /* Use the transpose to get the column nonzero structure. */
76     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
77     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
78     ierr = MatDestroy(&AT);CHKERRQ(ierr);
79   }
80 
81   /* Destroy the transpose */
82   /* Don't know how to do it. PETSc developers? */
83 
84   /* construct SPAI preconditioner */
85   /* FILE *messages */     /* file for warning messages */
86   /* double epsilon */     /* tolerance */
87   /* int nbsteps */        /* max number of "improvement" steps per line */
88   /* int max */            /* max dimensions of is_I, q, etc. */
89   /* int maxnew */         /* max number of new entries per step */
90   /* int block_size */     /* block_size == 1 specifies scalar elments
91                               block_size == n specifies nxn constant-block elements
92                               block_size == 0 specifies variable-block elements */
93   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
94   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
95                               verbose == 1 prints timing and matrix statistics */
96 
97   ierr = bspai(ispai->B,&ispai->M,
98                stdout,
99                ispai->epsilon,
100                ispai->nbsteps,
101                ispai->max,
102                ispai->maxnew,
103                ispai->block_size,
104                ispai->cache_size,
105                ispai->verbose);CHKERRQ(ierr);
106 
107   ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr);
108 
109   /* free the SPAI matrices */
110   sp_free_matrix(ispai->B);
111   sp_free_matrix(ispai->M);
112   PetscFunctionReturn(0);
113 }
114 
115 /**********************************************************************/
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "PCApply_SPAI"
119 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
120 {
121   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   /* Now using PETSc's multiply */
126   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
127   PetscFunctionReturn(0);
128 }
129 
130 /**********************************************************************/
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "PCDestroy_SPAI"
134 static PetscErrorCode PCDestroy_SPAI(PC pc)
135 {
136   PetscErrorCode ierr;
137   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
138 
139   PetscFunctionBegin;
140   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
141   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
142   ierr = PetscFree(pc->data);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 /**********************************************************************/
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "PCView_SPAI"
150 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
151 {
152   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
153   PetscErrorCode ierr;
154   PetscBool      iascii;
155 
156   PetscFunctionBegin;
157   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
158   if (iascii) {
159     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %g\n",   (double)ispai->epsilon);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
174 static PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
175 {
176   PC_SPAI *ispai = (PC_SPAI*)pc->data;
177 
178   PetscFunctionBegin;
179   ispai->epsilon = epsilon1;
180   PetscFunctionReturn(0);
181 }
182 
183 /**********************************************************************/
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
187 static PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
188 {
189   PC_SPAI *ispai = (PC_SPAI*)pc->data;
190 
191   PetscFunctionBegin;
192   ispai->nbsteps = nbsteps1;
193   PetscFunctionReturn(0);
194 }
195 
196 /**********************************************************************/
197 
198 /* added 1/7/99 g.h. */
199 #undef __FUNCT__
200 #define __FUNCT__ "PCSPAISetMax_SPAI"
201 static PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
202 {
203   PC_SPAI *ispai = (PC_SPAI*)pc->data;
204 
205   PetscFunctionBegin;
206   ispai->max = max1;
207   PetscFunctionReturn(0);
208 }
209 
210 /**********************************************************************/
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
214 static PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
215 {
216   PC_SPAI *ispai = (PC_SPAI*)pc->data;
217 
218   PetscFunctionBegin;
219   ispai->maxnew = maxnew1;
220   PetscFunctionReturn(0);
221 }
222 
223 /**********************************************************************/
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
227 static PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
228 {
229   PC_SPAI *ispai = (PC_SPAI*)pc->data;
230 
231   PetscFunctionBegin;
232   ispai->block_size = block_size1;
233   PetscFunctionReturn(0);
234 }
235 
236 /**********************************************************************/
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
240 static PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
241 {
242   PC_SPAI *ispai = (PC_SPAI*)pc->data;
243 
244   PetscFunctionBegin;
245   ispai->cache_size = cache_size;
246   PetscFunctionReturn(0);
247 }
248 
249 /**********************************************************************/
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
253 static PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
254 {
255   PC_SPAI *ispai = (PC_SPAI*)pc->data;
256 
257   PetscFunctionBegin;
258   ispai->verbose = verbose;
259   PetscFunctionReturn(0);
260 }
261 
262 /**********************************************************************/
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "PCSPAISetSp_SPAI"
266 static PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
267 {
268   PC_SPAI *ispai = (PC_SPAI*)pc->data;
269 
270   PetscFunctionBegin;
271   ispai->sp = sp;
272   PetscFunctionReturn(0);
273 }
274 
275 /* -------------------------------------------------------------------*/
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "PCSPAISetEpsilon"
279 /*@
280   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
281 
282   Input Parameters:
283 + pc - the preconditioner
284 - eps - epsilon (default .4)
285 
286   Notes:  Espilon must be between 0 and 1. It controls the
287                  quality of the approximation of M to the inverse of
288                  A. Higher values of epsilon lead to more work, more
289                  fill, and usually better preconditioners. In many
290                  cases the best choice of epsilon is the one that
291                  divides the total solution time equally between the
292                  preconditioner and the solver.
293 
294   Level: intermediate
295 
296 .seealso: PCSPAI, PCSetType()
297   @*/
298 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
299 {
300   PetscErrorCode ierr;
301 
302   PetscFunctionBegin;
303   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 /**********************************************************************/
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "PCSPAISetNBSteps"
311 /*@
312   PCSPAISetNBSteps - set maximum number of improvement steps per row in
313         the SPAI preconditioner
314 
315   Input Parameters:
316 + pc - the preconditioner
317 - n - number of steps (default 5)
318 
319   Notes:  SPAI constructs to approximation to every column of
320                  the exact inverse of A in a series of improvement
321                  steps. The quality of the approximation is determined
322                  by epsilon. If an approximation achieving an accuracy
323                  of epsilon is not obtained after ns steps, SPAI simply
324                  uses the best approximation constructed so far.
325 
326   Level: intermediate
327 
328 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
329 @*/
330 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
331 {
332   PetscErrorCode ierr;
333 
334   PetscFunctionBegin;
335   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 /**********************************************************************/
340 
341 /* added 1/7/99 g.h. */
342 #undef __FUNCT__
343 #define __FUNCT__ "PCSPAISetMax"
344 /*@
345   PCSPAISetMax - set the size of various working buffers in
346         the SPAI preconditioner
347 
348   Input Parameters:
349 + pc - the preconditioner
350 - n - size (default is 5000)
351 
352   Level: intermediate
353 
354 .seealso: PCSPAI, PCSetType()
355 @*/
356 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
357 {
358   PetscErrorCode ierr;
359 
360   PetscFunctionBegin;
361   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 /**********************************************************************/
366 
367 #undef __FUNCT__
368 #define __FUNCT__ "PCSPAISetMaxNew"
369 /*@
370   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
371    in SPAI preconditioner
372 
373   Input Parameters:
374 + pc - the preconditioner
375 - n - maximum number (default 5)
376 
377   Level: intermediate
378 
379 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
380 @*/
381 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
382 {
383   PetscErrorCode ierr;
384 
385   PetscFunctionBegin;
386   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
387   PetscFunctionReturn(0);
388 }
389 
390 /**********************************************************************/
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCSPAISetBlockSize"
394 /*@
395   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
396 
397   Input Parameters:
398 + pc - the preconditioner
399 - n - block size (default 1)
400 
401   Notes: A block
402                  size of 1 treats A as a matrix of scalar elements. A
403                  block size of s > 1 treats A as a matrix of sxs
404                  blocks. A block size of 0 treats A as a matrix with
405                  variable sized blocks, which are determined by
406                  searching for dense square diagonal blocks in A.
407                  This can be very effective for finite-element
408                  matrices.
409 
410                  SPAI will convert A to block form, use a block
411                  version of the preconditioner algorithm, and then
412                  convert the result back to scalar form.
413 
414                  In many cases the a block-size parameter other than 1
415                  can lead to very significant improvement in
416                  performance.
417 
418 
419   Level: intermediate
420 
421 .seealso: PCSPAI, PCSetType()
422 @*/
423 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
424 {
425   PetscErrorCode ierr;
426 
427   PetscFunctionBegin;
428   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /**********************************************************************/
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "PCSPAISetCacheSize"
436 /*@
437   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
438 
439   Input Parameters:
440 + pc - the preconditioner
441 - n -  cache size {0,1,2,3,4,5} (default 5)
442 
443   Notes:    SPAI uses a hash table to cache messages and avoid
444                  redundant communication. If suggest always using
445                  5. This parameter is irrelevant in the serial
446                  version.
447 
448   Level: intermediate
449 
450 .seealso: PCSPAI, PCSetType()
451 @*/
452 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
453 {
454   PetscErrorCode ierr;
455 
456   PetscFunctionBegin;
457   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 /**********************************************************************/
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "PCSPAISetVerbose"
465 /*@
466   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
467 
468   Input Parameters:
469 + pc - the preconditioner
470 - n - level (default 1)
471 
472   Notes: print parameters, timings and matrix statistics
473 
474   Level: intermediate
475 
476 .seealso: PCSPAI, PCSetType()
477 @*/
478 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
479 {
480   PetscErrorCode ierr;
481 
482   PetscFunctionBegin;
483   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 /**********************************************************************/
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "PCSPAISetSp"
491 /*@
492   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
493 
494   Input Parameters:
495 + pc - the preconditioner
496 - n - 0 or 1
497 
498   Notes: If A has a symmetric nonzero pattern use -sp 1 to
499                  improve performance by eliminating some communication
500                  in the parallel version. Even if A does not have a
501                  symmetric nonzero pattern -sp 1 may well lead to good
502                  results, but the code will not follow the published
503                  SPAI algorithm exactly.
504 
505 
506   Level: intermediate
507 
508 .seealso: PCSPAI, PCSetType()
509 @*/
510 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
511 {
512   PetscErrorCode ierr;
513 
514   PetscFunctionBegin;
515   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 /**********************************************************************/
520 
521 /**********************************************************************/
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "PCSetFromOptions_SPAI"
525 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptions *PetscOptionsObject,PC pc)
526 {
527   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
528   PetscErrorCode ierr;
529   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
530   double         epsilon1;
531   PetscBool      flg;
532 
533   PetscFunctionBegin;
534   ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr);
535   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
536   if (flg) {
537     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
538   }
539   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
540   if (flg) {
541     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
542   }
543   /* added 1/7/99 g.h. */
544   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
545   if (flg) {
546     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
547   }
548   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
549   if (flg) {
550     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
551   }
552   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
553   if (flg) {
554     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
555   }
556   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
557   if (flg) {
558     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
559   }
560   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
561   if (flg) {
562     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
563   }
564   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
565   if (flg) {
566     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
567   }
568   ierr = PetscOptionsTail();CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 /**********************************************************************/
573 
574 /*MC
575    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
576      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
577 
578    Options Database Keys:
579 +  -pc_spai_epsilon <eps> - set tolerance
580 .  -pc_spai_nbstep <n> - set nbsteps
581 .  -pc_spai_max <m> - set max
582 .  -pc_spai_max_new <m> - set maxnew
583 .  -pc_spai_block_size <n> - set block size
584 .  -pc_spai_cache_size <n> - set cache size
585 .  -pc_spai_sp <m> - set sp
586 -  -pc_spai_set_verbose <true,false> - verbose output
587 
588    Notes: This only works with AIJ matrices.
589 
590    Level: beginner
591 
592    Concepts: approximate inverse
593 
594 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
595     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
596     PCSPAISetVerbose(), PCSPAISetSp()
597 M*/
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "PCCreate_SPAI"
601 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
602 {
603   PC_SPAI        *ispai;
604   PetscErrorCode ierr;
605 
606   PetscFunctionBegin;
607   ierr     = PetscNewLog(pc,&ispai);CHKERRQ(ierr);
608   pc->data = ispai;
609 
610   pc->ops->destroy         = PCDestroy_SPAI;
611   pc->ops->apply           = PCApply_SPAI;
612   pc->ops->applyrichardson = 0;
613   pc->ops->setup           = PCSetUp_SPAI;
614   pc->ops->view            = PCView_SPAI;
615   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
616 
617   ispai->epsilon    = .4;
618   ispai->nbsteps    = 5;
619   ispai->max        = 5000;
620   ispai->maxnew     = 5;
621   ispai->block_size = 1;
622   ispai->cache_size = 5;
623   ispai->verbose    = 0;
624 
625   ispai->sp = 1;
626   ierr      = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr);
627 
628   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
632   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
633   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
634   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
635   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 /**********************************************************************/
640 
641 /*
642    Converts from a PETSc matrix to an SPAI matrix
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "ConvertMatToMatrix"
646 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
647 {
648   matrix                  *M;
649   int                     i,j,col;
650   int                     row_indx;
651   int                     len,pe,local_indx,start_indx;
652   int                     *mapping;
653   PetscErrorCode          ierr;
654   const int               *cols;
655   const double            *vals;
656   int                     n,mnl,nnl,nz,rstart,rend;
657   PetscMPIInt             size,rank;
658   struct compressed_lines *rows;
659 
660   PetscFunctionBegin;
661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
662   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
663   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
664   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
665 
666   /*
667     not sure why a barrier is required. commenting out
668   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
669   */
670 
671   M = new_matrix((SPAI_Comm)comm);
672 
673   M->n              = n;
674   M->bs             = 1;
675   M->max_block_size = 1;
676 
677   M->mnls          = (int*)malloc(sizeof(int)*size);
678   M->start_indices = (int*)malloc(sizeof(int)*size);
679   M->pe            = (int*)malloc(sizeof(int)*n);
680   M->block_sizes   = (int*)malloc(sizeof(int)*n);
681   for (i=0; i<n; i++) M->block_sizes[i] = 1;
682 
683   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
684 
685   M->start_indices[0] = 0;
686   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
687 
688   M->mnl            = M->mnls[M->myid];
689   M->my_start_index = M->start_indices[M->myid];
690 
691   for (i=0; i<size; i++) {
692     start_indx = M->start_indices[i];
693     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
694   }
695 
696   if (AT) {
697     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
698   } else {
699     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
700   }
701 
702   rows = M->lines;
703 
704   /* Determine the mapping from global indices to pointers */
705   ierr       = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr);
706   pe         = 0;
707   local_indx = 0;
708   for (i=0; i<M->n; i++) {
709     if (local_indx >= M->mnls[pe]) {
710       pe++;
711       local_indx = 0;
712     }
713     mapping[i] = local_indx + M->start_indices[pe];
714     local_indx++;
715   }
716 
717   /*********************************************************/
718   /************** Set up the row structure *****************/
719   /*********************************************************/
720 
721   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
722   for (i=rstart; i<rend; i++) {
723     row_indx = i - rstart;
724     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
725     /* allocate buffers */
726     rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int));
727     rows->A[row_indx]    = (double*)malloc(nz*sizeof(double));
728     /* copy the matrix */
729     for (j=0; j<nz; j++) {
730       col = cols[j];
731       len = rows->len[row_indx]++;
732 
733       rows->ptrs[row_indx][len] = mapping[col];
734       rows->A[row_indx][len]    = vals[j];
735     }
736     rows->slen[row_indx] = rows->len[row_indx];
737 
738     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
739   }
740 
741 
742   /************************************************************/
743   /************** Set up the column structure *****************/
744   /*********************************************************/
745 
746   if (AT) {
747 
748     for (i=rstart; i<rend; i++) {
749       row_indx = i - rstart;
750       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
751       /* allocate buffers */
752       rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int));
753       /* copy the matrix (i.e., the structure) */
754       for (j=0; j<nz; j++) {
755         col = cols[j];
756         len = rows->rlen[row_indx]++;
757 
758         rows->rptrs[row_indx][len] = mapping[col];
759       }
760       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
761     }
762   }
763 
764   ierr = PetscFree(mapping);CHKERRQ(ierr);
765 
766   order_pointers(M);
767   M->maxnz = calc_maxnz(M);
768   *B       = M;
769   PetscFunctionReturn(0);
770 }
771 
772 /**********************************************************************/
773 
774 /*
775    Converts from an SPAI matrix B  to a PETSc matrix PB.
776    This assumes that the SPAI matrix B is stored in
777    COMPRESSED-ROW format.
778 */
779 #undef __FUNCT__
780 #define __FUNCT__ "ConvertMatrixToMat"
781 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
782 {
783   PetscMPIInt    size,rank;
784   PetscErrorCode ierr;
785   int            m,n,M,N;
786   int            d_nz,o_nz;
787   int            *d_nnz,*o_nnz;
788   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
789   PetscScalar    val;
790 
791   PetscFunctionBegin;
792   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
793   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
794 
795   m    = n = B->mnls[rank];
796   d_nz = o_nz = 0;
797 
798   /* Determine preallocation for MatCreateMPIAIJ */
799   ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr);
800   ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr);
801   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
802   first_diag_col = B->start_indices[rank];
803   last_diag_col  = first_diag_col + B->mnls[rank];
804   for (i=0; i<B->mnls[rank]; i++) {
805     for (k=0; k<B->lines->len[i]; k++) {
806       global_col = B->lines->ptrs[i][k];
807       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
808       else o_nnz[i]++;
809     }
810   }
811 
812   M = N = B->n;
813   /* Here we only know how to create AIJ format */
814   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
815   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
816   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
817   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
818   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
819 
820   for (i=0; i<B->mnls[rank]; i++) {
821     global_row = B->start_indices[rank]+i;
822     for (k=0; k<B->lines->len[i]; k++) {
823       global_col = B->lines->ptrs[i][k];
824 
825       val  = B->lines->A[i][k];
826       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
827     }
828   }
829 
830   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
831   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
832 
833   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 /**********************************************************************/
839 
840 /*
841    Converts from an SPAI vector v  to a PETSc vec Pv.
842 */
843 #undef __FUNCT__
844 #define __FUNCT__ "ConvertVectorToVec"
845 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
846 {
847   PetscErrorCode ierr;
848   PetscMPIInt    size,rank;
849   int            m,M,i,*mnls,*start_indices,*global_indices;
850 
851   PetscFunctionBegin;
852   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
853   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
854 
855   m = v->mnl;
856   M = v->n;
857 
858 
859   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
860 
861   ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr);
862   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
863 
864   ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr);
865 
866   start_indices[0] = 0;
867   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
868 
869   ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr);
870   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
871 
872   ierr = PetscFree(mnls);CHKERRQ(ierr);
873   ierr = PetscFree(start_indices);CHKERRQ(ierr);
874 
875   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
876   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
877   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
878 
879   ierr = PetscFree(global_indices);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 
885 
886