xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
37 
38 typedef struct {
39 
40   matrix *B;                /* matrix in SPAI format */
41   matrix *BT;               /* transpose of matrix in SPAI format */
42   matrix *M;                /* the approximate inverse in SPAI format */
43 
44   Mat PM;                   /* the approximate inverse PETSc format */
45 
46   double epsilon;           /* tolerance */
47   int    nbsteps;           /* max number of "improvement" steps per line */
48   int    max;               /* max dimensions of is_I, q, etc. */
49   int    maxnew;            /* max number of new entries per step */
50   int    block_size;        /* constant block size */
51   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;           /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
93   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
94                               verbose == 1 prints timing and matrix statistics */
95 
96   ierr = bspai(ispai->B,&ispai->M,
97                stdout,
98                ispai->epsilon,
99                ispai->nbsteps,
100                ispai->max,
101                ispai->maxnew,
102                ispai->block_size,
103                ispai->cache_size,
104                ispai->verbose);CHKERRQ(ierr);
105 
106   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
107 
108   /* free the SPAI matrices */
109   sp_free_matrix(ispai->B);
110   sp_free_matrix(ispai->M);
111   PetscFunctionReturn(0);
112 }
113 
114 /**********************************************************************/
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCApply_SPAI"
118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
119 {
120   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   /* Now using PETSc's multiply */
125   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 /**********************************************************************/
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "PCDestroy_SPAI"
133 static PetscErrorCode PCDestroy_SPAI(PC pc)
134 {
135   PetscErrorCode ierr;
136   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
140   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
141   ierr = PetscFree(pc->data);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 /**********************************************************************/
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "PCView_SPAI"
149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
150 {
151   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
152   PetscErrorCode ierr;
153   PetscBool      iascii;
154 
155   PetscFunctionBegin;
156   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
157   if (iascii) {
158     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
159     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 EXTERN_C_BEGIN
172 #undef __FUNCT__
173 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
174 PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
175 {
176   PC_SPAI *ispai = (PC_SPAI*)pc->data;
177 
178   PetscFunctionBegin;
179   ispai->epsilon = epsilon1;
180   PetscFunctionReturn(0);
181 }
182 EXTERN_C_END
183 
184 /**********************************************************************/
185 
186 EXTERN_C_BEGIN
187 #undef __FUNCT__
188 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
189 PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
190 {
191   PC_SPAI *ispai = (PC_SPAI*)pc->data;
192 
193   PetscFunctionBegin;
194   ispai->nbsteps = nbsteps1;
195   PetscFunctionReturn(0);
196 }
197 EXTERN_C_END
198 
199 /**********************************************************************/
200 
201 /* added 1/7/99 g.h. */
202 EXTERN_C_BEGIN
203 #undef __FUNCT__
204 #define __FUNCT__ "PCSPAISetMax_SPAI"
205 PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
206 {
207   PC_SPAI *ispai = (PC_SPAI*)pc->data;
208 
209   PetscFunctionBegin;
210   ispai->max = max1;
211   PetscFunctionReturn(0);
212 }
213 EXTERN_C_END
214 
215 /**********************************************************************/
216 
217 EXTERN_C_BEGIN
218 #undef __FUNCT__
219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
220 PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221 {
222   PC_SPAI *ispai = (PC_SPAI*)pc->data;
223 
224   PetscFunctionBegin;
225   ispai->maxnew = maxnew1;
226   PetscFunctionReturn(0);
227 }
228 EXTERN_C_END
229 
230 /**********************************************************************/
231 
232 EXTERN_C_BEGIN
233 #undef __FUNCT__
234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
235 PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
236 {
237   PC_SPAI *ispai = (PC_SPAI*)pc->data;
238 
239   PetscFunctionBegin;
240   ispai->block_size = block_size1;
241   PetscFunctionReturn(0);
242 }
243 EXTERN_C_END
244 
245 /**********************************************************************/
246 
247 EXTERN_C_BEGIN
248 #undef __FUNCT__
249 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
250 PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
251 {
252   PC_SPAI *ispai = (PC_SPAI*)pc->data;
253 
254   PetscFunctionBegin;
255   ispai->cache_size = cache_size;
256   PetscFunctionReturn(0);
257 }
258 EXTERN_C_END
259 
260 /**********************************************************************/
261 
262 EXTERN_C_BEGIN
263 #undef __FUNCT__
264 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
265 PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
266 {
267   PC_SPAI *ispai = (PC_SPAI*)pc->data;
268 
269   PetscFunctionBegin;
270   ispai->verbose = verbose;
271   PetscFunctionReturn(0);
272 }
273 EXTERN_C_END
274 
275 /**********************************************************************/
276 
277 EXTERN_C_BEGIN
278 #undef __FUNCT__
279 #define __FUNCT__ "PCSPAISetSp_SPAI"
280 PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
281 {
282   PC_SPAI *ispai = (PC_SPAI*)pc->data;
283 
284   PetscFunctionBegin;
285   ispai->sp = sp;
286   PetscFunctionReturn(0);
287 }
288 EXTERN_C_END
289 
290 /* -------------------------------------------------------------------*/
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "PCSPAISetEpsilon"
294 /*@
295   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
296 
297   Input Parameters:
298 + pc - the preconditioner
299 - eps - epsilon (default .4)
300 
301   Notes:  Espilon must be between 0 and 1. It controls the
302                  quality of the approximation of M to the inverse of
303                  A. Higher values of epsilon lead to more work, more
304                  fill, and usually better preconditioners. In many
305                  cases the best choice of epsilon is the one that
306                  divides the total solution time equally between the
307                  preconditioner and the solver.
308 
309   Level: intermediate
310 
311 .seealso: PCSPAI, PCSetType()
312   @*/
313 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
314 {
315   PetscErrorCode ierr;
316 
317   PetscFunctionBegin;
318   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 /**********************************************************************/
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "PCSPAISetNBSteps"
326 /*@
327   PCSPAISetNBSteps - set maximum number of improvement steps per row in
328         the SPAI preconditioner
329 
330   Input Parameters:
331 + pc - the preconditioner
332 - n - number of steps (default 5)
333 
334   Notes:  SPAI constructs to approximation to every column of
335                  the exact inverse of A in a series of improvement
336                  steps. The quality of the approximation is determined
337                  by epsilon. If an approximation achieving an accuracy
338                  of epsilon is not obtained after ns steps, SPAI simply
339                  uses the best approximation constructed so far.
340 
341   Level: intermediate
342 
343 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
344 @*/
345 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
346 {
347   PetscErrorCode ierr;
348 
349   PetscFunctionBegin;
350   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 /**********************************************************************/
355 
356 /* added 1/7/99 g.h. */
357 #undef __FUNCT__
358 #define __FUNCT__ "PCSPAISetMax"
359 /*@
360   PCSPAISetMax - set the size of various working buffers in
361         the SPAI preconditioner
362 
363   Input Parameters:
364 + pc - the preconditioner
365 - n - size (default is 5000)
366 
367   Level: intermediate
368 
369 .seealso: PCSPAI, PCSetType()
370 @*/
371 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
372 {
373   PetscErrorCode ierr;
374 
375   PetscFunctionBegin;
376   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 
380 /**********************************************************************/
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "PCSPAISetMaxNew"
384 /*@
385   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
386    in SPAI preconditioner
387 
388   Input Parameters:
389 + pc - the preconditioner
390 - n - maximum number (default 5)
391 
392   Level: intermediate
393 
394 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
395 @*/
396 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
397 {
398   PetscErrorCode ierr;
399 
400   PetscFunctionBegin;
401   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 /**********************************************************************/
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "PCSPAISetBlockSize"
409 /*@
410   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
411 
412   Input Parameters:
413 + pc - the preconditioner
414 - n - block size (default 1)
415 
416   Notes: A block
417                  size of 1 treats A as a matrix of scalar elements. A
418                  block size of s > 1 treats A as a matrix of sxs
419                  blocks. A block size of 0 treats A as a matrix with
420                  variable sized blocks, which are determined by
421                  searching for dense square diagonal blocks in A.
422                  This can be very effective for finite-element
423                  matrices.
424 
425                  SPAI will convert A to block form, use a block
426                  version of the preconditioner algorithm, and then
427                  convert the result back to scalar form.
428 
429                  In many cases the a block-size parameter other than 1
430                  can lead to very significant improvement in
431                  performance.
432 
433 
434   Level: intermediate
435 
436 .seealso: PCSPAI, PCSetType()
437 @*/
438 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
439 {
440   PetscErrorCode ierr;
441 
442   PetscFunctionBegin;
443   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 /**********************************************************************/
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "PCSPAISetCacheSize"
451 /*@
452   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
453 
454   Input Parameters:
455 + pc - the preconditioner
456 - n -  cache size {0,1,2,3,4,5} (default 5)
457 
458   Notes:    SPAI uses a hash table to cache messages and avoid
459                  redundant communication. If suggest always using
460                  5. This parameter is irrelevant in the serial
461                  version.
462 
463   Level: intermediate
464 
465 .seealso: PCSPAI, PCSetType()
466 @*/
467 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
468 {
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 /**********************************************************************/
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "PCSPAISetVerbose"
480 /*@
481   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
482 
483   Input Parameters:
484 + pc - the preconditioner
485 - n - level (default 1)
486 
487   Notes: print parameters, timings and matrix statistics
488 
489   Level: intermediate
490 
491 .seealso: PCSPAI, PCSetType()
492 @*/
493 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
494 {
495   PetscErrorCode ierr;
496 
497   PetscFunctionBegin;
498   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 /**********************************************************************/
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "PCSPAISetSp"
506 /*@
507   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
508 
509   Input Parameters:
510 + pc - the preconditioner
511 - n - 0 or 1
512 
513   Notes: If A has a symmetric nonzero pattern use -sp 1 to
514                  improve performance by eliminating some communication
515                  in the parallel version. Even if A does not have a
516                  symmetric nonzero pattern -sp 1 may well lead to good
517                  results, but the code will not follow the published
518                  SPAI algorithm exactly.
519 
520 
521   Level: intermediate
522 
523 .seealso: PCSPAI, PCSetType()
524 @*/
525 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
526 {
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 /**********************************************************************/
535 
536 /**********************************************************************/
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "PCSetFromOptions_SPAI"
540 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
541 {
542   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
543   PetscErrorCode ierr;
544   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
545   double         epsilon1;
546   PetscBool      flg;
547 
548   PetscFunctionBegin;
549   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
550   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
551   if (flg) {
552     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
553   }
554   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
555   if (flg) {
556     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
557   }
558   /* added 1/7/99 g.h. */
559   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
560   if (flg) {
561     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
562   }
563   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
564   if (flg) {
565     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
566   }
567   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
568   if (flg) {
569     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
570   }
571   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
572   if (flg) {
573     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
574   }
575   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
576   if (flg) {
577     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
578   }
579   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
580   if (flg) {
581     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
582   }
583   ierr = PetscOptionsTail();CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 /**********************************************************************/
588 
589 /*MC
590    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
591      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
592 
593    Options Database Keys:
594 +  -pc_spai_epsilon <eps> - set tolerance
595 .  -pc_spai_nbstep <n> - set nbsteps
596 .  -pc_spai_max <m> - set max
597 .  -pc_spai_max_new <m> - set maxnew
598 .  -pc_spai_block_size <n> - set block size
599 .  -pc_spai_cache_size <n> - set cache size
600 .  -pc_spai_sp <m> - set sp
601 -  -pc_spai_set_verbose <true,false> - verbose output
602 
603    Notes: This only works with AIJ matrices.
604 
605    Level: beginner
606 
607    Concepts: approximate inverse
608 
609 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
610     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
611     PCSPAISetVerbose(), PCSPAISetSp()
612 M*/
613 
614 EXTERN_C_BEGIN
615 #undef __FUNCT__
616 #define __FUNCT__ "PCCreate_SPAI"
617 PetscErrorCode  PCCreate_SPAI(PC pc)
618 {
619   PC_SPAI        *ispai;
620   PetscErrorCode ierr;
621 
622   PetscFunctionBegin;
623   ierr     = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
624   pc->data = ispai;
625 
626   pc->ops->destroy         = PCDestroy_SPAI;
627   pc->ops->apply           = PCApply_SPAI;
628   pc->ops->applyrichardson = 0;
629   pc->ops->setup           = PCSetUp_SPAI;
630   pc->ops->view            = PCView_SPAI;
631   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
632 
633   ispai->epsilon    = .4;
634   ispai->nbsteps    = 5;
635   ispai->max        = 5000;
636   ispai->maxnew     = 5;
637   ispai->block_size = 1;
638   ispai->cache_size = 5;
639   ispai->verbose    = 0;
640 
641   ispai->sp = 1;
642   ierr      = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
643 
644   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
645                                            "PCSPAISetEpsilon_SPAI",
646                                            PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
647   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
648                                            "PCSPAISetNBSteps_SPAI",
649                                            PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
650   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
651                                            "PCSPAISetMax_SPAI",
652                                            PCSPAISetMax_SPAI);CHKERRQ(ierr);
653   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
654                                            "PCSPAISetMaxNew_SPAI",
655                                            PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
656   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
657                                            "PCSPAISetBlockSize_SPAI",
658                                            PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
659   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
660                                            "PCSPAISetCacheSize_SPAI",
661                                            PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
662   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
663                                            "PCSPAISetVerbose_SPAI",
664                                            PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
665   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
666                                            "PCSPAISetSp_SPAI",
667                                            PCSPAISetSp_SPAI);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 EXTERN_C_END
671 
672 /**********************************************************************/
673 
674 /*
675    Converts from a PETSc matrix to an SPAI matrix
676 */
677 #undef __FUNCT__
678 #define __FUNCT__ "ConvertMatToMatrix"
679 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
680 {
681   matrix                  *M;
682   int                     i,j,col;
683   int                     row_indx;
684   int                     len,pe,local_indx,start_indx;
685   int                     *mapping;
686   PetscErrorCode          ierr;
687   const int               *cols;
688   const double            *vals;
689   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
690   PetscMPIInt             size,rank;
691   struct compressed_lines *rows;
692 
693   PetscFunctionBegin;
694   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
695   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
696   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
697   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
698 
699   /*
700     not sure why a barrier is required. commenting out
701   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
702   */
703 
704   M = new_matrix((SPAI_Comm)comm);
705 
706   M->n              = n;
707   M->bs             = 1;
708   M->max_block_size = 1;
709 
710   M->mnls          = (int*)malloc(sizeof(int)*size);
711   M->start_indices = (int*)malloc(sizeof(int)*size);
712   M->pe            = (int*)malloc(sizeof(int)*n);
713   M->block_sizes   = (int*)malloc(sizeof(int)*n);
714   for (i=0; i<n; i++) M->block_sizes[i] = 1;
715 
716   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
717 
718   M->start_indices[0] = 0;
719   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
720 
721   M->mnl            = M->mnls[M->myid];
722   M->my_start_index = M->start_indices[M->myid];
723 
724   for (i=0; i<size; i++) {
725     start_indx = M->start_indices[i];
726     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
727   }
728 
729   if (AT) {
730     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
731   } else {
732     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
733   }
734 
735   rows = M->lines;
736 
737   /* Determine the mapping from global indices to pointers */
738   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
739   pe         = 0;
740   local_indx = 0;
741   for (i=0; i<M->n; i++) {
742     if (local_indx >= M->mnls[pe]) {
743       pe++;
744       local_indx = 0;
745     }
746     mapping[i] = local_indx + M->start_indices[pe];
747     local_indx++;
748   }
749 
750 
751   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
752 
753   /*********************************************************/
754   /************** Set up the row structure *****************/
755   /*********************************************************/
756 
757   /* count number of nonzeros in every row */
758   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
759   for (i=rstart; i<rend; i++) {
760     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
761     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
762   }
763 
764   /* allocate buffers */
765   len = 0;
766   for (i=0; i<mnl; i++) {
767     if (len < num_ptr[i]) len = num_ptr[i];
768   }
769 
770   for (i=rstart; i<rend; i++) {
771     row_indx             = i-rstart;
772     len                  = num_ptr[row_indx];
773     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
774     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
775   }
776 
777   /* copy the matrix */
778   for (i=rstart; i<rend; i++) {
779     row_indx = i - rstart;
780     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
781     for (j=0; j<nz; j++) {
782       col = cols[j];
783       len = rows->len[row_indx]++;
784 
785       rows->ptrs[row_indx][len] = mapping[col];
786       rows->A[row_indx][len]    = vals[j];
787     }
788     rows->slen[row_indx] = rows->len[row_indx];
789 
790     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
791   }
792 
793 
794   /************************************************************/
795   /************** Set up the column structure *****************/
796   /*********************************************************/
797 
798   if (AT) {
799 
800     /* count number of nonzeros in every column */
801     for (i=rstart; i<rend; i++) {
802       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
803       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
804     }
805 
806     /* allocate buffers */
807     len = 0;
808     for (i=0; i<mnl; i++) {
809       if (len < num_ptr[i]) len = num_ptr[i];
810     }
811 
812     for (i=rstart; i<rend; i++) {
813       row_indx = i-rstart;
814       len      = num_ptr[row_indx];
815 
816       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
817     }
818 
819     /* copy the matrix (i.e., the structure) */
820     for (i=rstart; i<rend; i++) {
821       row_indx = i - rstart;
822       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
823       for (j=0; j<nz; j++) {
824         col = cols[j];
825         len = rows->rlen[row_indx]++;
826 
827         rows->rptrs[row_indx][len] = mapping[col];
828       }
829       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
830     }
831   }
832 
833   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
834   ierr = PetscFree(mapping);CHKERRQ(ierr);
835 
836   order_pointers(M);
837   M->maxnz = calc_maxnz(M);
838   *B       = M;
839   PetscFunctionReturn(0);
840 }
841 
842 /**********************************************************************/
843 
844 /*
845    Converts from an SPAI matrix B  to a PETSc matrix PB.
846    This assumes that the the SPAI matrix B is stored in
847    COMPRESSED-ROW format.
848 */
849 #undef __FUNCT__
850 #define __FUNCT__ "ConvertMatrixToMat"
851 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
852 {
853   PetscMPIInt    size,rank;
854   PetscErrorCode ierr;
855   int            m,n,M,N;
856   int            d_nz,o_nz;
857   int            *d_nnz,*o_nnz;
858   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
859   PetscScalar    val;
860 
861   PetscFunctionBegin;
862   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
863   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
864 
865   m    = n = B->mnls[rank];
866   d_nz = o_nz = 0;
867 
868   /* Determine preallocation for MatCreateMPIAIJ */
869   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
870   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
871   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
872   first_diag_col = B->start_indices[rank];
873   last_diag_col  = first_diag_col + B->mnls[rank];
874   for (i=0; i<B->mnls[rank]; i++) {
875     for (k=0; k<B->lines->len[i]; k++) {
876       global_col = B->lines->ptrs[i][k];
877       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
878       else o_nnz[i]++;
879     }
880   }
881 
882   M = N = B->n;
883   /* Here we only know how to create AIJ format */
884   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
885   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
886   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
887   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
888   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
889 
890   for (i=0; i<B->mnls[rank]; i++) {
891     global_row = B->start_indices[rank]+i;
892     for (k=0; k<B->lines->len[i]; k++) {
893       global_col = B->lines->ptrs[i][k];
894 
895       val  = B->lines->A[i][k];
896       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
897     }
898   }
899 
900   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
901   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
902 
903   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
904   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
905   PetscFunctionReturn(0);
906 }
907 
908 /**********************************************************************/
909 
910 /*
911    Converts from an SPAI vector v  to a PETSc vec Pv.
912 */
913 #undef __FUNCT__
914 #define __FUNCT__ "ConvertVectorToVec"
915 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
916 {
917   PetscErrorCode ierr;
918   PetscMPIInt    size,rank;
919   int            m,M,i,*mnls,*start_indices,*global_indices;
920 
921   PetscFunctionBegin;
922   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
923   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
924 
925   m = v->mnl;
926   M = v->n;
927 
928 
929   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
930 
931   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
932   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
933 
934   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
935 
936   start_indices[0] = 0;
937   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
938 
939   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
940   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
941 
942   ierr = PetscFree(mnls);CHKERRQ(ierr);
943   ierr = PetscFree(start_indices);CHKERRQ(ierr);
944 
945   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
946   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
947   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
948 
949   ierr = PetscFree(global_indices);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 
954 
955 
956