xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision fb8e56e08d4d0bfe9fc63603ca1f7fddd68abbdb)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
37 
38 typedef struct {
39 
40   matrix *B;                /* matrix in SPAI format */
41   matrix *BT;               /* transpose of matrix in SPAI format */
42   matrix *M;                /* the approximate inverse in SPAI format */
43 
44   Mat PM;                   /* the approximate inverse PETSc format */
45 
46   double epsilon;           /* tolerance */
47   int    nbsteps;           /* max number of "improvement" steps per line */
48   int    max;               /* max dimensions of is_I, q, etc. */
49   int    maxnew;            /* max number of new entries per step */
50   int    block_size;        /* constant block size */
51   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;           /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
93   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
94                               verbose == 1 prints timing and matrix statistics */
95 
96   ierr = bspai(ispai->B,&ispai->M,
97                stdout,
98                ispai->epsilon,
99                ispai->nbsteps,
100                ispai->max,
101                ispai->maxnew,
102                ispai->block_size,
103                ispai->cache_size,
104                ispai->verbose);CHKERRQ(ierr);
105 
106   ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr);
107 
108   /* free the SPAI matrices */
109   sp_free_matrix(ispai->B);
110   sp_free_matrix(ispai->M);
111   PetscFunctionReturn(0);
112 }
113 
114 /**********************************************************************/
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCApply_SPAI"
118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
119 {
120   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   /* Now using PETSc's multiply */
125   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 /**********************************************************************/
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "PCDestroy_SPAI"
133 static PetscErrorCode PCDestroy_SPAI(PC pc)
134 {
135   PetscErrorCode ierr;
136   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
140   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
141   ierr = PetscFree(pc->data);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 /**********************************************************************/
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "PCView_SPAI"
149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
150 {
151   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
152   PetscErrorCode ierr;
153   PetscBool      iascii;
154 
155   PetscFunctionBegin;
156   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
157   if (iascii) {
158     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
159     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
173 static PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
174 {
175   PC_SPAI *ispai = (PC_SPAI*)pc->data;
176 
177   PetscFunctionBegin;
178   ispai->epsilon = epsilon1;
179   PetscFunctionReturn(0);
180 }
181 
182 /**********************************************************************/
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
186 static PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
187 {
188   PC_SPAI *ispai = (PC_SPAI*)pc->data;
189 
190   PetscFunctionBegin;
191   ispai->nbsteps = nbsteps1;
192   PetscFunctionReturn(0);
193 }
194 
195 /**********************************************************************/
196 
197 /* added 1/7/99 g.h. */
198 #undef __FUNCT__
199 #define __FUNCT__ "PCSPAISetMax_SPAI"
200 static PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
201 {
202   PC_SPAI *ispai = (PC_SPAI*)pc->data;
203 
204   PetscFunctionBegin;
205   ispai->max = max1;
206   PetscFunctionReturn(0);
207 }
208 
209 /**********************************************************************/
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
213 static PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
214 {
215   PC_SPAI *ispai = (PC_SPAI*)pc->data;
216 
217   PetscFunctionBegin;
218   ispai->maxnew = maxnew1;
219   PetscFunctionReturn(0);
220 }
221 
222 /**********************************************************************/
223 
224 #undef __FUNCT__
225 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
226 static PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
227 {
228   PC_SPAI *ispai = (PC_SPAI*)pc->data;
229 
230   PetscFunctionBegin;
231   ispai->block_size = block_size1;
232   PetscFunctionReturn(0);
233 }
234 
235 /**********************************************************************/
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
239 static PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
240 {
241   PC_SPAI *ispai = (PC_SPAI*)pc->data;
242 
243   PetscFunctionBegin;
244   ispai->cache_size = cache_size;
245   PetscFunctionReturn(0);
246 }
247 
248 /**********************************************************************/
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
252 static PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
253 {
254   PC_SPAI *ispai = (PC_SPAI*)pc->data;
255 
256   PetscFunctionBegin;
257   ispai->verbose = verbose;
258   PetscFunctionReturn(0);
259 }
260 
261 /**********************************************************************/
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "PCSPAISetSp_SPAI"
265 static PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
266 {
267   PC_SPAI *ispai = (PC_SPAI*)pc->data;
268 
269   PetscFunctionBegin;
270   ispai->sp = sp;
271   PetscFunctionReturn(0);
272 }
273 
274 /* -------------------------------------------------------------------*/
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "PCSPAISetEpsilon"
278 /*@
279   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
280 
281   Input Parameters:
282 + pc - the preconditioner
283 - eps - epsilon (default .4)
284 
285   Notes:  Espilon must be between 0 and 1. It controls the
286                  quality of the approximation of M to the inverse of
287                  A. Higher values of epsilon lead to more work, more
288                  fill, and usually better preconditioners. In many
289                  cases the best choice of epsilon is the one that
290                  divides the total solution time equally between the
291                  preconditioner and the solver.
292 
293   Level: intermediate
294 
295 .seealso: PCSPAI, PCSetType()
296   @*/
297 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
298 {
299   PetscErrorCode ierr;
300 
301   PetscFunctionBegin;
302   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 /**********************************************************************/
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "PCSPAISetNBSteps"
310 /*@
311   PCSPAISetNBSteps - set maximum number of improvement steps per row in
312         the SPAI preconditioner
313 
314   Input Parameters:
315 + pc - the preconditioner
316 - n - number of steps (default 5)
317 
318   Notes:  SPAI constructs to approximation to every column of
319                  the exact inverse of A in a series of improvement
320                  steps. The quality of the approximation is determined
321                  by epsilon. If an approximation achieving an accuracy
322                  of epsilon is not obtained after ns steps, SPAI simply
323                  uses the best approximation constructed so far.
324 
325   Level: intermediate
326 
327 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
328 @*/
329 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
330 {
331   PetscErrorCode ierr;
332 
333   PetscFunctionBegin;
334   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 /**********************************************************************/
339 
340 /* added 1/7/99 g.h. */
341 #undef __FUNCT__
342 #define __FUNCT__ "PCSPAISetMax"
343 /*@
344   PCSPAISetMax - set the size of various working buffers in
345         the SPAI preconditioner
346 
347   Input Parameters:
348 + pc - the preconditioner
349 - n - size (default is 5000)
350 
351   Level: intermediate
352 
353 .seealso: PCSPAI, PCSetType()
354 @*/
355 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
356 {
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 
364 /**********************************************************************/
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "PCSPAISetMaxNew"
368 /*@
369   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
370    in SPAI preconditioner
371 
372   Input Parameters:
373 + pc - the preconditioner
374 - n - maximum number (default 5)
375 
376   Level: intermediate
377 
378 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
379 @*/
380 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
381 {
382   PetscErrorCode ierr;
383 
384   PetscFunctionBegin;
385   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 /**********************************************************************/
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "PCSPAISetBlockSize"
393 /*@
394   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
395 
396   Input Parameters:
397 + pc - the preconditioner
398 - n - block size (default 1)
399 
400   Notes: A block
401                  size of 1 treats A as a matrix of scalar elements. A
402                  block size of s > 1 treats A as a matrix of sxs
403                  blocks. A block size of 0 treats A as a matrix with
404                  variable sized blocks, which are determined by
405                  searching for dense square diagonal blocks in A.
406                  This can be very effective for finite-element
407                  matrices.
408 
409                  SPAI will convert A to block form, use a block
410                  version of the preconditioner algorithm, and then
411                  convert the result back to scalar form.
412 
413                  In many cases the a block-size parameter other than 1
414                  can lead to very significant improvement in
415                  performance.
416 
417 
418   Level: intermediate
419 
420 .seealso: PCSPAI, PCSetType()
421 @*/
422 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
423 {
424   PetscErrorCode ierr;
425 
426   PetscFunctionBegin;
427   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 /**********************************************************************/
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "PCSPAISetCacheSize"
435 /*@
436   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
437 
438   Input Parameters:
439 + pc - the preconditioner
440 - n -  cache size {0,1,2,3,4,5} (default 5)
441 
442   Notes:    SPAI uses a hash table to cache messages and avoid
443                  redundant communication. If suggest always using
444                  5. This parameter is irrelevant in the serial
445                  version.
446 
447   Level: intermediate
448 
449 .seealso: PCSPAI, PCSetType()
450 @*/
451 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
452 {
453   PetscErrorCode ierr;
454 
455   PetscFunctionBegin;
456   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 /**********************************************************************/
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "PCSPAISetVerbose"
464 /*@
465   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
466 
467   Input Parameters:
468 + pc - the preconditioner
469 - n - level (default 1)
470 
471   Notes: print parameters, timings and matrix statistics
472 
473   Level: intermediate
474 
475 .seealso: PCSPAI, PCSetType()
476 @*/
477 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
478 {
479   PetscErrorCode ierr;
480 
481   PetscFunctionBegin;
482   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 /**********************************************************************/
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "PCSPAISetSp"
490 /*@
491   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
492 
493   Input Parameters:
494 + pc - the preconditioner
495 - n - 0 or 1
496 
497   Notes: If A has a symmetric nonzero pattern use -sp 1 to
498                  improve performance by eliminating some communication
499                  in the parallel version. Even if A does not have a
500                  symmetric nonzero pattern -sp 1 may well lead to good
501                  results, but the code will not follow the published
502                  SPAI algorithm exactly.
503 
504 
505   Level: intermediate
506 
507 .seealso: PCSPAI, PCSetType()
508 @*/
509 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
510 {
511   PetscErrorCode ierr;
512 
513   PetscFunctionBegin;
514   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 /**********************************************************************/
519 
520 /**********************************************************************/
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "PCSetFromOptions_SPAI"
524 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
525 {
526   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
527   PetscErrorCode ierr;
528   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
529   double         epsilon1;
530   PetscBool      flg;
531 
532   PetscFunctionBegin;
533   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
534   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
535   if (flg) {
536     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
537   }
538   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
539   if (flg) {
540     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
541   }
542   /* added 1/7/99 g.h. */
543   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
544   if (flg) {
545     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
546   }
547   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
548   if (flg) {
549     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
550   }
551   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
552   if (flg) {
553     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
554   }
555   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
556   if (flg) {
557     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
558   }
559   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
560   if (flg) {
561     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
562   }
563   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
564   if (flg) {
565     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
566   }
567   ierr = PetscOptionsTail();CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 /**********************************************************************/
572 
573 /*MC
574    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
575      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
576 
577    Options Database Keys:
578 +  -pc_spai_epsilon <eps> - set tolerance
579 .  -pc_spai_nbstep <n> - set nbsteps
580 .  -pc_spai_max <m> - set max
581 .  -pc_spai_max_new <m> - set maxnew
582 .  -pc_spai_block_size <n> - set block size
583 .  -pc_spai_cache_size <n> - set cache size
584 .  -pc_spai_sp <m> - set sp
585 -  -pc_spai_set_verbose <true,false> - verbose output
586 
587    Notes: This only works with AIJ matrices.
588 
589    Level: beginner
590 
591    Concepts: approximate inverse
592 
593 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
594     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
595     PCSPAISetVerbose(), PCSPAISetSp()
596 M*/
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "PCCreate_SPAI"
600 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
601 {
602   PC_SPAI        *ispai;
603   PetscErrorCode ierr;
604 
605   PetscFunctionBegin;
606   ierr     = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
607   pc->data = ispai;
608 
609   pc->ops->destroy         = PCDestroy_SPAI;
610   pc->ops->apply           = PCApply_SPAI;
611   pc->ops->applyrichardson = 0;
612   pc->ops->setup           = PCSetUp_SPAI;
613   pc->ops->view            = PCView_SPAI;
614   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
615 
616   ispai->epsilon    = .4;
617   ispai->nbsteps    = 5;
618   ispai->max        = 5000;
619   ispai->maxnew     = 5;
620   ispai->block_size = 1;
621   ispai->cache_size = 5;
622   ispai->verbose    = 0;
623 
624   ispai->sp = 1;
625   ierr      = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr);
626 
627   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
628   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
632   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
633   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
634   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr);
635   PetscFunctionReturn(0);
636 }
637 
638 /**********************************************************************/
639 
640 /*
641    Converts from a PETSc matrix to an SPAI matrix
642 */
643 #undef __FUNCT__
644 #define __FUNCT__ "ConvertMatToMatrix"
645 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
646 {
647   matrix                  *M;
648   int                     i,j,col;
649   int                     row_indx;
650   int                     len,pe,local_indx,start_indx;
651   int                     *mapping;
652   PetscErrorCode          ierr;
653   const int               *cols;
654   const double            *vals;
655   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
656   PetscMPIInt             size,rank;
657   struct compressed_lines *rows;
658 
659   PetscFunctionBegin;
660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
661   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
662   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
663   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
664 
665   /*
666     not sure why a barrier is required. commenting out
667   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
668   */
669 
670   M = new_matrix((SPAI_Comm)comm);
671 
672   M->n              = n;
673   M->bs             = 1;
674   M->max_block_size = 1;
675 
676   M->mnls          = (int*)malloc(sizeof(int)*size);
677   M->start_indices = (int*)malloc(sizeof(int)*size);
678   M->pe            = (int*)malloc(sizeof(int)*n);
679   M->block_sizes   = (int*)malloc(sizeof(int)*n);
680   for (i=0; i<n; i++) M->block_sizes[i] = 1;
681 
682   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
683 
684   M->start_indices[0] = 0;
685   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
686 
687   M->mnl            = M->mnls[M->myid];
688   M->my_start_index = M->start_indices[M->myid];
689 
690   for (i=0; i<size; i++) {
691     start_indx = M->start_indices[i];
692     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
693   }
694 
695   if (AT) {
696     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
697   } else {
698     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
699   }
700 
701   rows = M->lines;
702 
703   /* Determine the mapping from global indices to pointers */
704   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
705   pe         = 0;
706   local_indx = 0;
707   for (i=0; i<M->n; i++) {
708     if (local_indx >= M->mnls[pe]) {
709       pe++;
710       local_indx = 0;
711     }
712     mapping[i] = local_indx + M->start_indices[pe];
713     local_indx++;
714   }
715 
716 
717   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
718 
719   /*********************************************************/
720   /************** Set up the row structure *****************/
721   /*********************************************************/
722 
723   /* count number of nonzeros in every row */
724   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
725   for (i=rstart; i<rend; i++) {
726     ierr = MatGetRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
727     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
728   }
729 
730   /* allocate buffers */
731   len = 0;
732   for (i=0; i<mnl; i++) {
733     if (len < num_ptr[i]) len = num_ptr[i];
734   }
735 
736   for (i=rstart; i<rend; i++) {
737     row_indx             = i-rstart;
738     len                  = num_ptr[row_indx];
739     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
740     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
741   }
742 
743   /* copy the matrix */
744   for (i=rstart; i<rend; i++) {
745     row_indx = i - rstart;
746     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
747     for (j=0; j<nz; j++) {
748       col = cols[j];
749       len = rows->len[row_indx]++;
750 
751       rows->ptrs[row_indx][len] = mapping[col];
752       rows->A[row_indx][len]    = vals[j];
753     }
754     rows->slen[row_indx] = rows->len[row_indx];
755 
756     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
757   }
758 
759 
760   /************************************************************/
761   /************** Set up the column structure *****************/
762   /*********************************************************/
763 
764   if (AT) {
765 
766     /* count number of nonzeros in every column */
767     for (i=rstart; i<rend; i++) {
768       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
769       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
770     }
771 
772     /* allocate buffers */
773     len = 0;
774     for (i=0; i<mnl; i++) {
775       if (len < num_ptr[i]) len = num_ptr[i];
776     }
777 
778     for (i=rstart; i<rend; i++) {
779       row_indx = i-rstart;
780       len      = num_ptr[row_indx];
781 
782       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
783     }
784 
785     /* copy the matrix (i.e., the structure) */
786     for (i=rstart; i<rend; i++) {
787       row_indx = i - rstart;
788       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
789       for (j=0; j<nz; j++) {
790         col = cols[j];
791         len = rows->rlen[row_indx]++;
792 
793         rows->rptrs[row_indx][len] = mapping[col];
794       }
795       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
796     }
797   }
798 
799   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
800   ierr = PetscFree(mapping);CHKERRQ(ierr);
801 
802   order_pointers(M);
803   M->maxnz = calc_maxnz(M);
804   *B       = M;
805   PetscFunctionReturn(0);
806 }
807 
808 /**********************************************************************/
809 
810 /*
811    Converts from an SPAI matrix B  to a PETSc matrix PB.
812    This assumes that the the SPAI matrix B is stored in
813    COMPRESSED-ROW format.
814 */
815 #undef __FUNCT__
816 #define __FUNCT__ "ConvertMatrixToMat"
817 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
818 {
819   PetscMPIInt    size,rank;
820   PetscErrorCode ierr;
821   int            m,n,M,N;
822   int            d_nz,o_nz;
823   int            *d_nnz,*o_nnz;
824   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
825   PetscScalar    val;
826 
827   PetscFunctionBegin;
828   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
829   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
830 
831   m    = n = B->mnls[rank];
832   d_nz = o_nz = 0;
833 
834   /* Determine preallocation for MatCreateMPIAIJ */
835   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
836   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
837   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
838   first_diag_col = B->start_indices[rank];
839   last_diag_col  = first_diag_col + B->mnls[rank];
840   for (i=0; i<B->mnls[rank]; i++) {
841     for (k=0; k<B->lines->len[i]; k++) {
842       global_col = B->lines->ptrs[i][k];
843       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
844       else o_nnz[i]++;
845     }
846   }
847 
848   M = N = B->n;
849   /* Here we only know how to create AIJ format */
850   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
851   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
852   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
853   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
854   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
855 
856   for (i=0; i<B->mnls[rank]; i++) {
857     global_row = B->start_indices[rank]+i;
858     for (k=0; k<B->lines->len[i]; k++) {
859       global_col = B->lines->ptrs[i][k];
860 
861       val  = B->lines->A[i][k];
862       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
863     }
864   }
865 
866   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
867   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
868 
869   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
870   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 /**********************************************************************/
875 
876 /*
877    Converts from an SPAI vector v  to a PETSc vec Pv.
878 */
879 #undef __FUNCT__
880 #define __FUNCT__ "ConvertVectorToVec"
881 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
882 {
883   PetscErrorCode ierr;
884   PetscMPIInt    size,rank;
885   int            m,M,i,*mnls,*start_indices,*global_indices;
886 
887   PetscFunctionBegin;
888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
889   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
890 
891   m = v->mnl;
892   M = v->n;
893 
894 
895   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
896 
897   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
898   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
899 
900   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
901 
902   start_indices[0] = 0;
903   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
904 
905   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
906   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
907 
908   ierr = PetscFree(mnls);CHKERRQ(ierr);
909   ierr = PetscFree(start_indices);CHKERRQ(ierr);
910 
911   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
912   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
913   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
914 
915   ierr = PetscFree(global_indices);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 
920 
921 
922