xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>              /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                row,col;
26   MatFactorInfo     info;
27   Vec               left = a->left,right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its*lits;
32   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
33   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
34   PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
36   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
37     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
38   }
39 
40   if (!a->diags) {
41     CHKERRQ(PetscMalloc1(mbs,&a->diags));
42     CHKERRQ(MatFactorInfoInitialize(&info));
43     for (i=0; i<mbs; i++) {
44       CHKERRQ(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
45       CHKERRQ(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info));
46       CHKERRQ(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
47       CHKERRQ(ISDestroy(&row));
48       CHKERRQ(ISDestroy(&col));
49     }
50     CHKERRQ(VecDuplicate(bb,&a->workb));
51   }
52   diag = a->diags;
53 
54   CHKERRQ(VecSet(xx,0.0));
55   CHKERRQ(VecGetArray(xx,&x));
56   /* copy right hand side because it must be modified during iteration */
57   CHKERRQ(VecCopy(bb,a->workb));
58   CHKERRQ(VecGetArrayRead(a->workb,&b));
59 
60   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
61   while (its--) {
62     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
63 
64       for (i=0; i<mbs; i++) {
65         n   = a->i[i+1] - a->i[i] - 1;
66         idx = a->j + a->i[i] + 1;
67         v   = a->a + a->i[i] + 1;
68 
69         CHKERRQ(VecSet(left,0.0));
70         for (j=0; j<n; j++) {
71           CHKERRQ(VecPlaceArray(right,x + idx[j]*bs));
72           CHKERRQ(MatMultAdd(v[j],right,left,left));
73           CHKERRQ(VecResetArray(right));
74         }
75         CHKERRQ(VecPlaceArray(right,b + i*bs));
76         CHKERRQ(VecAYPX(left,-1.0,right));
77         CHKERRQ(VecResetArray(right));
78 
79         CHKERRQ(VecPlaceArray(right,x + i*bs));
80         CHKERRQ(MatSolve(diag[i],left,right));
81 
82         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
83         for (j=0; j<n; j++) {
84           CHKERRQ(MatMultTranspose(v[j],right,left));
85           CHKERRQ(VecPlaceArray(middle,b + idx[j]*bs));
86           CHKERRQ(VecAXPY(middle,-1.0,left));
87           CHKERRQ(VecResetArray(middle));
88         }
89         CHKERRQ(VecResetArray(right));
90 
91       }
92     }
93     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94 
95       for (i=mbs-1; i>=0; i--) {
96         n   = a->i[i+1] - a->i[i] - 1;
97         idx = a->j + a->i[i] + 1;
98         v   = a->a + a->i[i] + 1;
99 
100         CHKERRQ(VecSet(left,0.0));
101         for (j=0; j<n; j++) {
102           CHKERRQ(VecPlaceArray(right,x + idx[j]*bs));
103           CHKERRQ(MatMultAdd(v[j],right,left,left));
104           CHKERRQ(VecResetArray(right));
105         }
106         CHKERRQ(VecPlaceArray(right,b + i*bs));
107         CHKERRQ(VecAYPX(left,-1.0,right));
108         CHKERRQ(VecResetArray(right));
109 
110         CHKERRQ(VecPlaceArray(right,x + i*bs));
111         CHKERRQ(MatSolve(diag[i],left,right));
112         CHKERRQ(VecResetArray(right));
113 
114       }
115     }
116   }
117   CHKERRQ(VecRestoreArray(xx,&x));
118   CHKERRQ(VecRestoreArrayRead(a->workb,&b));
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
123 {
124   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
125   PetscScalar       *x;
126   const Mat         *v;
127   const PetscScalar *b;
128   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
129   const PetscInt    *idx;
130   IS                row,col;
131   MatFactorInfo     info;
132   Vec               left = a->left,right = a->right;
133   Mat               *diag;
134 
135   PetscFunctionBegin;
136   its = its*lits;
137   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
138   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
139   PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
140   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
141 
142   if (!a->diags) {
143     CHKERRQ(PetscMalloc1(mbs,&a->diags));
144     CHKERRQ(MatFactorInfoInitialize(&info));
145     for (i=0; i<mbs; i++) {
146       CHKERRQ(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
147       CHKERRQ(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info));
148       CHKERRQ(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
149       CHKERRQ(ISDestroy(&row));
150       CHKERRQ(ISDestroy(&col));
151     }
152   }
153   diag = a->diags;
154 
155   CHKERRQ(VecSet(xx,0.0));
156   CHKERRQ(VecGetArray(xx,&x));
157   CHKERRQ(VecGetArrayRead(bb,&b));
158 
159   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
160   while (its--) {
161     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
162 
163       for (i=0; i<mbs; i++) {
164         n   = a->i[i+1] - a->i[i];
165         idx = a->j + a->i[i];
166         v   = a->a + a->i[i];
167 
168         CHKERRQ(VecSet(left,0.0));
169         for (j=0; j<n; j++) {
170           if (idx[j] != i) {
171             CHKERRQ(VecPlaceArray(right,x + idx[j]*bs));
172             CHKERRQ(MatMultAdd(v[j],right,left,left));
173             CHKERRQ(VecResetArray(right));
174           }
175         }
176         CHKERRQ(VecPlaceArray(right,b + i*bs));
177         CHKERRQ(VecAYPX(left,-1.0,right));
178         CHKERRQ(VecResetArray(right));
179 
180         CHKERRQ(VecPlaceArray(right,x + i*bs));
181         CHKERRQ(MatSolve(diag[i],left,right));
182         CHKERRQ(VecResetArray(right));
183       }
184     }
185     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186 
187       for (i=mbs-1; i>=0; i--) {
188         n   = a->i[i+1] - a->i[i];
189         idx = a->j + a->i[i];
190         v   = a->a + a->i[i];
191 
192         CHKERRQ(VecSet(left,0.0));
193         for (j=0; j<n; j++) {
194           if (idx[j] != i) {
195             CHKERRQ(VecPlaceArray(right,x + idx[j]*bs));
196             CHKERRQ(MatMultAdd(v[j],right,left,left));
197             CHKERRQ(VecResetArray(right));
198           }
199         }
200         CHKERRQ(VecPlaceArray(right,b + i*bs));
201         CHKERRQ(VecAYPX(left,-1.0,right));
202         CHKERRQ(VecResetArray(right));
203 
204         CHKERRQ(VecPlaceArray(right,x + i*bs));
205         CHKERRQ(MatSolve(diag[i],left,right));
206         CHKERRQ(VecResetArray(right));
207 
208       }
209     }
210   }
211   CHKERRQ(VecRestoreArray(xx,&x));
212   CHKERRQ(VecRestoreArrayRead(bb,&b));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
217 {
218   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
219   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
220   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
221   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
222   PetscInt       ridx,cidx;
223   PetscBool      roworiented=a->roworiented;
224   MatScalar      value;
225   Mat            *ap,*aa = a->a;
226 
227   PetscFunctionBegin;
228   for (k=0; k<m; k++) { /* loop over added rows */
229     row  = im[k];
230     brow = row/bs;
231     if (row < 0) continue;
232     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
233     rp   = aj + ai[brow];
234     ap   = aa + ai[brow];
235     rmax = imax[brow];
236     nrow = ailen[brow];
237     low  = 0;
238     high = nrow;
239     for (l=0; l<n; l++) { /* loop over added columns */
240       if (in[l] < 0) continue;
241       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
242       col = in[l]; bcol = col/bs;
243       if (A->symmetric && brow > bcol) continue;
244       ridx = row % bs; cidx = col % bs;
245       if (roworiented) value = v[l + k*n];
246       else value = v[k + l*m];
247 
248       if (col <= lastcol) low = 0;
249       else high = nrow;
250       lastcol = col;
251       while (high-low > 7) {
252         t = (low+high)/2;
253         if (rp[t] > bcol) high = t;
254         else              low  = t;
255       }
256       for (i=low; i<high; i++) {
257         if (rp[i] > bcol) break;
258         if (rp[i] == bcol) goto noinsert1;
259       }
260       if (nonew == 1) goto noinsert1;
261       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
262       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
263       N = nrow++ - 1; high++;
264       /* shift up all the later entries in this row */
265       for (ii=N; ii>=i; ii--) {
266         rp[ii+1] = rp[ii];
267         ap[ii+1] = ap[ii];
268       }
269       if (N>=i) ap[i] = NULL;
270       rp[i] = bcol;
271       a->nz++;
272       A->nonzerostate++;
273 noinsert1:;
274       if (!*(ap+i)) {
275         CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i));
276       }
277       CHKERRQ(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is));
278       low  = i;
279     }
280     ailen[brow] = nrow;
281   }
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
286 {
287   PetscErrorCode    ierr;
288   Mat               tmpA;
289   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
290   const PetscInt    *cols;
291   const PetscScalar *values;
292   PetscBool         flg = PETSC_FALSE,notdone;
293   Mat_SeqAIJ        *a;
294   Mat_BlockMat      *amat;
295 
296   PetscFunctionBegin;
297   /* force binary viewer to load .info file if it has not yet done so */
298   CHKERRQ(PetscViewerSetUp(viewer));
299   CHKERRQ(MatCreate(PETSC_COMM_SELF,&tmpA));
300   CHKERRQ(MatSetType(tmpA,MATSEQAIJ));
301   CHKERRQ(MatLoad_SeqAIJ(tmpA,viewer));
302 
303   CHKERRQ(MatGetLocalSize(tmpA,&m,&n));
304   ierr = PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");CHKERRQ(ierr);
305   CHKERRQ(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL));
306   CHKERRQ(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL));
307   ierr = PetscOptionsEnd();CHKERRQ(ierr);
308 
309   /* Determine number of nonzero blocks for each block row */
310   a    = (Mat_SeqAIJ*) tmpA->data;
311   mbs  = m/bs;
312   CHKERRQ(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens));
313   CHKERRQ(PetscArrayzero(lens,mbs));
314 
315   for (i=0; i<mbs; i++) {
316     for (j=0; j<bs; j++) {
317       ii[j]    = a->j + a->i[i*bs + j];
318       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
319     }
320 
321     currentcol = -1;
322     while (PETSC_TRUE) {
323       notdone = PETSC_FALSE;
324       nextcol = 1000000000;
325       for (j=0; j<bs; j++) {
326         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
327           ii[j]++;
328           ilens[j]--;
329         }
330         if (ilens[j] > 0) {
331           notdone = PETSC_TRUE;
332           nextcol = PetscMin(nextcol,ii[j][0]/bs);
333         }
334       }
335       if (!notdone) break;
336       if (!flg || (nextcol >= i)) lens[i]++;
337       currentcol = nextcol;
338     }
339   }
340 
341   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
342     CHKERRQ(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
343   }
344   CHKERRQ(MatBlockMatSetPreallocation(newmat,bs,0,lens));
345   if (flg) {
346     CHKERRQ(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE));
347   }
348   amat = (Mat_BlockMat*)(newmat)->data;
349 
350   /* preallocate the submatrices */
351   CHKERRQ(PetscMalloc1(bs,&llens));
352   for (i=0; i<mbs; i++) { /* loops for block rows */
353     for (j=0; j<bs; j++) {
354       ii[j]    = a->j + a->i[i*bs + j];
355       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
356     }
357 
358     currentcol = 1000000000;
359     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
360       if (ilens[j] > 0) {
361         currentcol = PetscMin(currentcol,ii[j][0]/bs);
362       }
363     }
364 
365     while (PETSC_TRUE) {  /* loops over blocks in block row */
366       notdone = PETSC_FALSE;
367       nextcol = 1000000000;
368       CHKERRQ(PetscArrayzero(llens,bs));
369       for (j=0; j<bs; j++) { /* loop over rows in block */
370         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
371           ii[j]++;
372           ilens[j]--;
373           llens[j]++;
374         }
375         if (ilens[j] > 0) {
376           notdone = PETSC_TRUE;
377           nextcol = PetscMin(nextcol,ii[j][0]/bs);
378         }
379       }
380       PetscCheckFalse(cnt >= amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt);
381       if (!flg || currentcol >= i) {
382         amat->j[cnt] = currentcol;
383         CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++));
384       }
385 
386       if (!notdone) break;
387       currentcol = nextcol;
388     }
389     amat->ilen[i] = lens[i];
390   }
391 
392   CHKERRQ(PetscFree3(lens,ii,ilens));
393   CHKERRQ(PetscFree(llens));
394 
395   /* copy over the matrix, one row at a time */
396   for (i=0; i<m; i++) {
397     CHKERRQ(MatGetRow(tmpA,i,&ncols,&cols,&values));
398     CHKERRQ(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES));
399     CHKERRQ(MatRestoreRow(tmpA,i,&ncols,&cols,&values));
400   }
401   CHKERRQ(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
402   CHKERRQ(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
403   PetscFunctionReturn(0);
404 }
405 
406 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
407 {
408   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
409   const char        *name;
410   PetscViewerFormat format;
411 
412   PetscFunctionBegin;
413   CHKERRQ(PetscObjectGetName((PetscObject)A,&name));
414   CHKERRQ(PetscViewerGetFormat(viewer,&format));
415   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
416     CHKERRQ(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz));
417     if (A->symmetric) {
418       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n"));
419     }
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
425 {
426   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
427   PetscInt       i;
428 
429   PetscFunctionBegin;
430   CHKERRQ(VecDestroy(&bmat->right));
431   CHKERRQ(VecDestroy(&bmat->left));
432   CHKERRQ(VecDestroy(&bmat->middle));
433   CHKERRQ(VecDestroy(&bmat->workb));
434   if (bmat->diags) {
435     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
436       CHKERRQ(MatDestroy(&bmat->diags[i]));
437     }
438   }
439   if (bmat->a) {
440     for (i=0; i<bmat->nz; i++) {
441       CHKERRQ(MatDestroy(&bmat->a[i]));
442     }
443   }
444   CHKERRQ(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
445   CHKERRQ(PetscFree(mat->data));
446   PetscFunctionReturn(0);
447 }
448 
449 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
450 {
451   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
452   PetscScalar    *xx,*yy;
453   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
454   Mat            *aa;
455 
456   PetscFunctionBegin;
457   /*
458      Standard CSR multiply except each entry is a Mat
459   */
460   CHKERRQ(VecGetArray(x,&xx));
461 
462   CHKERRQ(VecSet(y,0.0));
463   CHKERRQ(VecGetArray(y,&yy));
464   aj   = bmat->j;
465   aa   = bmat->a;
466   ii   = bmat->i;
467   for (i=0; i<m; i++) {
468     jrow = ii[i];
469     CHKERRQ(VecPlaceArray(bmat->left,yy + bs*i));
470     n    = ii[i+1] - jrow;
471     for (j=0; j<n; j++) {
472       CHKERRQ(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
473       CHKERRQ(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
474       CHKERRQ(VecResetArray(bmat->right));
475       jrow++;
476     }
477     CHKERRQ(VecResetArray(bmat->left));
478   }
479   CHKERRQ(VecRestoreArray(x,&xx));
480   CHKERRQ(VecRestoreArray(y,&yy));
481   PetscFunctionReturn(0);
482 }
483 
484 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
485 {
486   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
487   PetscScalar    *xx,*yy;
488   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
489   Mat            *aa;
490 
491   PetscFunctionBegin;
492   /*
493      Standard CSR multiply except each entry is a Mat
494   */
495   CHKERRQ(VecGetArray(x,&xx));
496 
497   CHKERRQ(VecSet(y,0.0));
498   CHKERRQ(VecGetArray(y,&yy));
499   aj   = bmat->j;
500   aa   = bmat->a;
501   ii   = bmat->i;
502   for (i=0; i<m; i++) {
503     jrow = ii[i];
504     n    = ii[i+1] - jrow;
505     CHKERRQ(VecPlaceArray(bmat->left,yy + bs*i));
506     CHKERRQ(VecPlaceArray(bmat->middle,xx + bs*i));
507     /* if we ALWAYS required a diagonal entry then could remove this if test */
508     if (aj[jrow] == i) {
509       CHKERRQ(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
510       CHKERRQ(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
511       CHKERRQ(VecResetArray(bmat->right));
512       jrow++;
513       n--;
514     }
515     for (j=0; j<n; j++) {
516       CHKERRQ(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));            /* upper triangular part */
517       CHKERRQ(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
518       CHKERRQ(VecResetArray(bmat->right));
519 
520       CHKERRQ(VecPlaceArray(bmat->right,yy + bs*aj[jrow]));            /* lower triangular part */
521       CHKERRQ(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right));
522       CHKERRQ(VecResetArray(bmat->right));
523       jrow++;
524     }
525     CHKERRQ(VecResetArray(bmat->left));
526     CHKERRQ(VecResetArray(bmat->middle));
527   }
528   CHKERRQ(VecRestoreArray(x,&xx));
529   CHKERRQ(VecRestoreArray(y,&yy));
530   PetscFunctionReturn(0);
531 }
532 
533 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
534 {
535   PetscFunctionBegin;
536   PetscFunctionReturn(0);
537 }
538 
539 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
540 {
541   PetscFunctionBegin;
542   PetscFunctionReturn(0);
543 }
544 
545 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
546 {
547   PetscFunctionBegin;
548   PetscFunctionReturn(0);
549 }
550 
551 /*
552      Adds diagonal pointers to sparse matrix structure.
553 */
554 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
555 {
556   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
557   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;
558 
559   PetscFunctionBegin;
560   if (!a->diag) {
561     CHKERRQ(PetscMalloc1(mbs,&a->diag));
562   }
563   for (i=0; i<mbs; i++) {
564     a->diag[i] = a->i[i+1];
565     for (j=a->i[i]; j<a->i[i+1]; j++) {
566       if (a->j[j] == i) {
567         a->diag[i] = j;
568         break;
569       }
570     }
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
576 {
577   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
578   Mat_SeqAIJ     *c;
579   PetscInt       i,k,first,step,lensi,nrows,ncols;
580   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
581   PetscScalar    *a_new;
582   Mat            C,*aa = a->a;
583   PetscBool      stride,equal;
584 
585   PetscFunctionBegin;
586   CHKERRQ(ISEqual(isrow,iscol,&equal));
587   PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
588   CHKERRQ(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride));
589   PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
590   CHKERRQ(ISStrideGetInfo(iscol,&first,&step));
591   PetscCheckFalse(step != A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");
592 
593   CHKERRQ(ISGetLocalSize(isrow,&nrows));
594   ncols = nrows;
595 
596   /* create submatrix */
597   if (scall == MAT_REUSE_MATRIX) {
598     PetscInt n_cols,n_rows;
599     C    = *B;
600     CHKERRQ(MatGetSize(C,&n_rows,&n_cols));
601     PetscCheckFalse(n_rows != nrows || n_cols != ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
602     CHKERRQ(MatZeroEntries(C));
603   } else {
604     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&C));
605     CHKERRQ(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE));
606     if (A->symmetric) {
607       CHKERRQ(MatSetType(C,MATSEQSBAIJ));
608     } else {
609       CHKERRQ(MatSetType(C,MATSEQAIJ));
610     }
611     CHKERRQ(MatSeqAIJSetPreallocation(C,0,ailen));
612     CHKERRQ(MatSeqSBAIJSetPreallocation(C,1,0,ailen));
613   }
614   c = (Mat_SeqAIJ*)C->data;
615 
616   /* loop over rows inserting into submatrix */
617   a_new = c->a;
618   j_new = c->j;
619   i_new = c->i;
620 
621   for (i=0; i<nrows; i++) {
622     lensi = ailen[i];
623     for (k=0; k<lensi; k++) {
624       *j_new++ = *aj++;
625       CHKERRQ(MatGetValue(*aa++,first,first,a_new++));
626     }
627     i_new[i+1] = i_new[i] + lensi;
628     c->ilen[i] = lensi;
629   }
630 
631   CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
632   CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
633   *B   = C;
634   PetscFunctionReturn(0);
635 }
636 
637 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
638 {
639   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
640   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
641   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
642   Mat            *aa    = a->a,*ap;
643 
644   PetscFunctionBegin;
645   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
646 
647   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
648   for (i=1; i<m; i++) {
649     /* move each row back by the amount of empty slots (fshift) before it*/
650     fshift += imax[i-1] - ailen[i-1];
651     rmax    = PetscMax(rmax,ailen[i]);
652     if (fshift) {
653       ip = aj + ai[i];
654       ap = aa + ai[i];
655       N  = ailen[i];
656       for (j=0; j<N; j++) {
657         ip[j-fshift] = ip[j];
658         ap[j-fshift] = ap[j];
659       }
660     }
661     ai[i] = ai[i-1] + ailen[i-1];
662   }
663   if (m) {
664     fshift += imax[m-1] - ailen[m-1];
665     ai[m]   = ai[m-1] + ailen[m-1];
666   }
667   /* reset ilen and imax for each row */
668   for (i=0; i<m; i++) {
669     ailen[i] = imax[i] = ai[i+1] - ai[i];
670   }
671   a->nz = ai[m];
672   for (i=0; i<a->nz; i++) {
673     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
674     CHKERRQ(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY));
675     CHKERRQ(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY));
676   }
677   CHKERRQ(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz));
678   CHKERRQ(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs));
679   CHKERRQ(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax));
680 
681   A->info.mallocs    += a->reallocs;
682   a->reallocs         = 0;
683   A->info.nz_unneeded = (double)fshift;
684   a->rmax             = rmax;
685   CHKERRQ(MatMarkDiagonal_BlockMat(A));
686   PetscFunctionReturn(0);
687 }
688 
689 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
690 {
691   PetscFunctionBegin;
692   if (opt == MAT_SYMMETRIC && flg) {
693     A->ops->sor  = MatSOR_BlockMat_Symmetric;
694     A->ops->mult = MatMult_BlockMat_Symmetric;
695   } else {
696     CHKERRQ(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]));
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
702                                        NULL,
703                                        NULL,
704                                        MatMult_BlockMat,
705                                /*  4*/ MatMultAdd_BlockMat,
706                                        MatMultTranspose_BlockMat,
707                                        MatMultTransposeAdd_BlockMat,
708                                        NULL,
709                                        NULL,
710                                        NULL,
711                                /* 10*/ NULL,
712                                        NULL,
713                                        NULL,
714                                        MatSOR_BlockMat,
715                                        NULL,
716                                /* 15*/ NULL,
717                                        NULL,
718                                        NULL,
719                                        NULL,
720                                        NULL,
721                                /* 20*/ NULL,
722                                        MatAssemblyEnd_BlockMat,
723                                        MatSetOption_BlockMat,
724                                        NULL,
725                                /* 24*/ NULL,
726                                        NULL,
727                                        NULL,
728                                        NULL,
729                                        NULL,
730                                /* 29*/ NULL,
731                                        NULL,
732                                        NULL,
733                                        NULL,
734                                        NULL,
735                                /* 34*/ NULL,
736                                        NULL,
737                                        NULL,
738                                        NULL,
739                                        NULL,
740                                /* 39*/ NULL,
741                                        NULL,
742                                        NULL,
743                                        NULL,
744                                        NULL,
745                                /* 44*/ NULL,
746                                        NULL,
747                                        MatShift_Basic,
748                                        NULL,
749                                        NULL,
750                                /* 49*/ NULL,
751                                        NULL,
752                                        NULL,
753                                        NULL,
754                                        NULL,
755                                /* 54*/ NULL,
756                                        NULL,
757                                        NULL,
758                                        NULL,
759                                        NULL,
760                                /* 59*/ MatCreateSubMatrix_BlockMat,
761                                        MatDestroy_BlockMat,
762                                        MatView_BlockMat,
763                                        NULL,
764                                        NULL,
765                                /* 64*/ NULL,
766                                        NULL,
767                                        NULL,
768                                        NULL,
769                                        NULL,
770                                /* 69*/ NULL,
771                                        NULL,
772                                        NULL,
773                                        NULL,
774                                        NULL,
775                                /* 74*/ NULL,
776                                        NULL,
777                                        NULL,
778                                        NULL,
779                                        NULL,
780                                /* 79*/ NULL,
781                                        NULL,
782                                        NULL,
783                                        NULL,
784                                        MatLoad_BlockMat,
785                                /* 84*/ NULL,
786                                        NULL,
787                                        NULL,
788                                        NULL,
789                                        NULL,
790                                /* 89*/ NULL,
791                                        NULL,
792                                        NULL,
793                                        NULL,
794                                        NULL,
795                                /* 94*/ NULL,
796                                        NULL,
797                                        NULL,
798                                        NULL,
799                                        NULL,
800                                /* 99*/ NULL,
801                                        NULL,
802                                        NULL,
803                                        NULL,
804                                        NULL,
805                                /*104*/ NULL,
806                                        NULL,
807                                        NULL,
808                                        NULL,
809                                        NULL,
810                                /*109*/ NULL,
811                                        NULL,
812                                        NULL,
813                                        NULL,
814                                        NULL,
815                                /*114*/ NULL,
816                                        NULL,
817                                        NULL,
818                                        NULL,
819                                        NULL,
820                                /*119*/ NULL,
821                                        NULL,
822                                        NULL,
823                                        NULL,
824                                        NULL,
825                                /*124*/ NULL,
826                                        NULL,
827                                        NULL,
828                                        NULL,
829                                        NULL,
830                                /*129*/ NULL,
831                                        NULL,
832                                        NULL,
833                                        NULL,
834                                        NULL,
835                                /*134*/ NULL,
836                                        NULL,
837                                        NULL,
838                                        NULL,
839                                        NULL,
840                                /*139*/ NULL,
841                                        NULL,
842                                        NULL,
843                                        NULL,
844                                        NULL,
845                                /*144*/ NULL,
846                                        NULL,
847                                        NULL,
848                                        NULL
849 };
850 
851 /*@C
852    MatBlockMatSetPreallocation - For good matrix assembly performance
853    the user should preallocate the matrix storage by setting the parameter nz
854    (or the array nnz).  By setting these parameters accurately, performance
855    during matrix assembly can be increased by more than a factor of 50.
856 
857    Collective
858 
859    Input Parameters:
860 +  B - The matrix
861 .  bs - size of each block in matrix
862 .  nz - number of nonzeros per block row (same for all rows)
863 -  nnz - array containing the number of nonzeros in the various block rows
864          (possibly different for each row) or NULL
865 
866    Notes:
867      If nnz is given then nz is ignored
868 
869    Specify the preallocated storage with either nz or nnz (not both).
870    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
871    allocation.  For large problems you MUST preallocate memory or you
872    will get TERRIBLE performance, see the users' manual chapter on matrices.
873 
874    Level: intermediate
875 
876 .seealso: MatCreate(), MatCreateBlockMat(), MatSetValues()
877 
878 @*/
879 PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
880 {
881   PetscFunctionBegin;
882   CHKERRQ(PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz)));
883   PetscFunctionReturn(0);
884 }
885 
886 static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
887 {
888   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
889   PetscInt       i;
890 
891   PetscFunctionBegin;
892   CHKERRQ(PetscLayoutSetBlockSize(A->rmap,bs));
893   CHKERRQ(PetscLayoutSetBlockSize(A->cmap,bs));
894   CHKERRQ(PetscLayoutSetUp(A->rmap));
895   CHKERRQ(PetscLayoutSetUp(A->cmap));
896   CHKERRQ(PetscLayoutGetBlockSize(A->rmap,&bs));
897 
898   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
899   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
900   if (nnz) {
901     for (i=0; i<A->rmap->n/bs; i++) {
902       PetscCheckFalse(nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
903       PetscCheckFalse(nnz[i] > A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs);
904     }
905   }
906   bmat->mbs = A->rmap->n/bs;
907 
908   CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right));
909   CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle));
910   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left));
911 
912   if (!bmat->imax) {
913     CHKERRQ(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen));
914     CHKERRQ(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt)));
915   }
916   if (PetscLikely(nnz)) {
917     nz = 0;
918     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
919       bmat->imax[i] = nnz[i];
920       nz           += nnz[i];
921     }
922   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
923 
924   /* bmat->ilen will count nonzeros in each row so far. */
925   CHKERRQ(PetscArrayzero(bmat->ilen,bmat->mbs));
926 
927   /* allocate the matrix space */
928   CHKERRQ(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
929   CHKERRQ(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i));
930   CHKERRQ(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt))));
931   bmat->i[0] = 0;
932   for (i=1; i<bmat->mbs+1; i++) {
933     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
934   }
935   bmat->singlemalloc = PETSC_TRUE;
936   bmat->free_a       = PETSC_TRUE;
937   bmat->free_ij      = PETSC_TRUE;
938 
939   bmat->nz            = 0;
940   bmat->maxnz         = nz;
941   A->info.nz_unneeded = (double)bmat->maxnz;
942   CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
943   PetscFunctionReturn(0);
944 }
945 
946 /*MC
947    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
948                  consisting of (usually) sparse blocks.
949 
950   Level: advanced
951 
952 .seealso: MatCreateBlockMat()
953 
954 M*/
955 
956 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
957 {
958   Mat_BlockMat   *b;
959 
960   PetscFunctionBegin;
961   CHKERRQ(PetscNewLog(A,&b));
962   A->data = (void*)b;
963   CHKERRQ(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
964 
965   A->assembled    = PETSC_TRUE;
966   A->preallocated = PETSC_FALSE;
967   CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT));
968 
969   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat));
970   PetscFunctionReturn(0);
971 }
972 
973 /*@C
974    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object
975 
976   Collective
977 
978    Input Parameters:
979 +  comm - MPI communicator
980 .  m - number of rows
981 .  n  - number of columns
982 .  bs - size of each submatrix
983 .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
984 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
985 
986    Output Parameter:
987 .  A - the matrix
988 
989    Level: intermediate
990 
991    Notes:
992     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
993    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
994 
995    For matrices containing parallel submatrices and variable block sizes, see MATNEST.
996 
997 .seealso: MATBLOCKMAT, MatCreateNest()
998 @*/
999 PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
1000 {
1001   PetscFunctionBegin;
1002   CHKERRQ(MatCreate(comm,A));
1003   CHKERRQ(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
1004   CHKERRQ(MatSetType(*A,MATBLOCKMAT));
1005   CHKERRQ(MatBlockMatSetPreallocation(*A,bs,nz,nnz));
1006   PetscFunctionReturn(0);
1007 }
1008