xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision aab5e2ee74cc0683d55b8d6ea66f630ffc349b4f)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>              /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                row,col;
26   MatFactorInfo     info;
27   Vec               left = a->left,right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its*lits;
32   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
33   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
34   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
36   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
37     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
38   }
39 
40   if (!a->diags) {
41     PetscCall(PetscMalloc1(mbs,&a->diags));
42     PetscCall(MatFactorInfoInitialize(&info));
43     for (i=0; i<mbs; i++) {
44       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
45       PetscCall(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info));
46       PetscCall(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
47       PetscCall(ISDestroy(&row));
48       PetscCall(ISDestroy(&col));
49     }
50     PetscCall(VecDuplicate(bb,&a->workb));
51   }
52   diag = a->diags;
53 
54   PetscCall(VecSet(xx,0.0));
55   PetscCall(VecGetArray(xx,&x));
56   /* copy right hand side because it must be modified during iteration */
57   PetscCall(VecCopy(bb,a->workb));
58   PetscCall(VecGetArrayRead(a->workb,&b));
59 
60   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
61   while (its--) {
62     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
63 
64       for (i=0; i<mbs; i++) {
65         n   = a->i[i+1] - a->i[i] - 1;
66         idx = a->j + a->i[i] + 1;
67         v   = a->a + a->i[i] + 1;
68 
69         PetscCall(VecSet(left,0.0));
70         for (j=0; j<n; j++) {
71           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
72           PetscCall(MatMultAdd(v[j],right,left,left));
73           PetscCall(VecResetArray(right));
74         }
75         PetscCall(VecPlaceArray(right,b + i*bs));
76         PetscCall(VecAYPX(left,-1.0,right));
77         PetscCall(VecResetArray(right));
78 
79         PetscCall(VecPlaceArray(right,x + i*bs));
80         PetscCall(MatSolve(diag[i],left,right));
81 
82         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
83         for (j=0; j<n; j++) {
84           PetscCall(MatMultTranspose(v[j],right,left));
85           PetscCall(VecPlaceArray(middle,b + idx[j]*bs));
86           PetscCall(VecAXPY(middle,-1.0,left));
87           PetscCall(VecResetArray(middle));
88         }
89         PetscCall(VecResetArray(right));
90 
91       }
92     }
93     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94 
95       for (i=mbs-1; i>=0; i--) {
96         n   = a->i[i+1] - a->i[i] - 1;
97         idx = a->j + a->i[i] + 1;
98         v   = a->a + a->i[i] + 1;
99 
100         PetscCall(VecSet(left,0.0));
101         for (j=0; j<n; j++) {
102           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
103           PetscCall(MatMultAdd(v[j],right,left,left));
104           PetscCall(VecResetArray(right));
105         }
106         PetscCall(VecPlaceArray(right,b + i*bs));
107         PetscCall(VecAYPX(left,-1.0,right));
108         PetscCall(VecResetArray(right));
109 
110         PetscCall(VecPlaceArray(right,x + i*bs));
111         PetscCall(MatSolve(diag[i],left,right));
112         PetscCall(VecResetArray(right));
113 
114       }
115     }
116   }
117   PetscCall(VecRestoreArray(xx,&x));
118   PetscCall(VecRestoreArrayRead(a->workb,&b));
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
123 {
124   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
125   PetscScalar       *x;
126   const Mat         *v;
127   const PetscScalar *b;
128   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
129   const PetscInt    *idx;
130   IS                row,col;
131   MatFactorInfo     info;
132   Vec               left = a->left,right = a->right;
133   Mat               *diag;
134 
135   PetscFunctionBegin;
136   its = its*lits;
137   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
138   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
139   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
140   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
141 
142   if (!a->diags) {
143     PetscCall(PetscMalloc1(mbs,&a->diags));
144     PetscCall(MatFactorInfoInitialize(&info));
145     for (i=0; i<mbs; i++) {
146       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
147       PetscCall(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info));
148       PetscCall(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
149       PetscCall(ISDestroy(&row));
150       PetscCall(ISDestroy(&col));
151     }
152   }
153   diag = a->diags;
154 
155   PetscCall(VecSet(xx,0.0));
156   PetscCall(VecGetArray(xx,&x));
157   PetscCall(VecGetArrayRead(bb,&b));
158 
159   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
160   while (its--) {
161     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
162 
163       for (i=0; i<mbs; i++) {
164         n   = a->i[i+1] - a->i[i];
165         idx = a->j + a->i[i];
166         v   = a->a + a->i[i];
167 
168         PetscCall(VecSet(left,0.0));
169         for (j=0; j<n; j++) {
170           if (idx[j] != i) {
171             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
172             PetscCall(MatMultAdd(v[j],right,left,left));
173             PetscCall(VecResetArray(right));
174           }
175         }
176         PetscCall(VecPlaceArray(right,b + i*bs));
177         PetscCall(VecAYPX(left,-1.0,right));
178         PetscCall(VecResetArray(right));
179 
180         PetscCall(VecPlaceArray(right,x + i*bs));
181         PetscCall(MatSolve(diag[i],left,right));
182         PetscCall(VecResetArray(right));
183       }
184     }
185     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186 
187       for (i=mbs-1; i>=0; i--) {
188         n   = a->i[i+1] - a->i[i];
189         idx = a->j + a->i[i];
190         v   = a->a + a->i[i];
191 
192         PetscCall(VecSet(left,0.0));
193         for (j=0; j<n; j++) {
194           if (idx[j] != i) {
195             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
196             PetscCall(MatMultAdd(v[j],right,left,left));
197             PetscCall(VecResetArray(right));
198           }
199         }
200         PetscCall(VecPlaceArray(right,b + i*bs));
201         PetscCall(VecAYPX(left,-1.0,right));
202         PetscCall(VecResetArray(right));
203 
204         PetscCall(VecPlaceArray(right,x + i*bs));
205         PetscCall(MatSolve(diag[i],left,right));
206         PetscCall(VecResetArray(right));
207 
208       }
209     }
210   }
211   PetscCall(VecRestoreArray(xx,&x));
212   PetscCall(VecRestoreArrayRead(bb,&b));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
217 {
218   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
219   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
220   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
221   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
222   PetscInt       ridx,cidx;
223   PetscBool      roworiented=a->roworiented;
224   MatScalar      value;
225   Mat            *ap,*aa = a->a;
226 
227   PetscFunctionBegin;
228   for (k=0; k<m; k++) { /* loop over added rows */
229     row  = im[k];
230     brow = row/bs;
231     if (row < 0) continue;
232     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
233     rp   = aj + ai[brow];
234     ap   = aa + ai[brow];
235     rmax = imax[brow];
236     nrow = ailen[brow];
237     low  = 0;
238     high = nrow;
239     for (l=0; l<n; l++) { /* loop over added columns */
240       if (in[l] < 0) continue;
241       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
242       col = in[l]; bcol = col/bs;
243       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
244       ridx = row % bs; cidx = col % bs;
245       if (roworiented) value = v[l + k*n];
246       else value = v[k + l*m];
247 
248       if (col <= lastcol) low = 0;
249       else high = nrow;
250       lastcol = col;
251       while (high-low > 7) {
252         t = (low+high)/2;
253         if (rp[t] > bcol) high = t;
254         else              low  = t;
255       }
256       for (i=low; i<high; i++) {
257         if (rp[i] > bcol) break;
258         if (rp[i] == bcol) goto noinsert1;
259       }
260       if (nonew == 1) goto noinsert1;
261       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
262       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
263       N = nrow++ - 1; high++;
264       /* shift up all the later entries in this row */
265       for (ii=N; ii>=i; ii--) {
266         rp[ii+1] = rp[ii];
267         ap[ii+1] = ap[ii];
268       }
269       if (N>=i) ap[i] = NULL;
270       rp[i] = bcol;
271       a->nz++;
272       A->nonzerostate++;
273 noinsert1:;
274       if (!*(ap+i)) {
275         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i));
276       }
277       PetscCall(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is));
278       low  = i;
279     }
280     ailen[brow] = nrow;
281   }
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
286 {
287   Mat               tmpA;
288   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
289   const PetscInt    *cols;
290   const PetscScalar *values;
291   PetscBool         flg = PETSC_FALSE,notdone;
292   Mat_SeqAIJ        *a;
293   Mat_BlockMat      *amat;
294 
295   PetscFunctionBegin;
296   /* force binary viewer to load .info file if it has not yet done so */
297   PetscCall(PetscViewerSetUp(viewer));
298   PetscCall(MatCreate(PETSC_COMM_SELF,&tmpA));
299   PetscCall(MatSetType(tmpA,MATSEQAIJ));
300   PetscCall(MatLoad_SeqAIJ(tmpA,viewer));
301 
302   PetscCall(MatGetLocalSize(tmpA,&m,&n));
303   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
304   PetscCall(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL));
305   PetscCall(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL));
306   PetscOptionsEnd();
307 
308   /* Determine number of nonzero blocks for each block row */
309   a    = (Mat_SeqAIJ*) tmpA->data;
310   mbs  = m/bs;
311   PetscCall(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens));
312   PetscCall(PetscArrayzero(lens,mbs));
313 
314   for (i=0; i<mbs; i++) {
315     for (j=0; j<bs; j++) {
316       ii[j]    = a->j + a->i[i*bs + j];
317       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
318     }
319 
320     currentcol = -1;
321     while (PETSC_TRUE) {
322       notdone = PETSC_FALSE;
323       nextcol = 1000000000;
324       for (j=0; j<bs; j++) {
325         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
326           ii[j]++;
327           ilens[j]--;
328         }
329         if (ilens[j] > 0) {
330           notdone = PETSC_TRUE;
331           nextcol = PetscMin(nextcol,ii[j][0]/bs);
332         }
333       }
334       if (!notdone) break;
335       if (!flg || (nextcol >= i)) lens[i]++;
336       currentcol = nextcol;
337     }
338   }
339 
340   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
341     PetscCall(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
342   }
343   PetscCall(MatBlockMatSetPreallocation(newmat,bs,0,lens));
344   if (flg) PetscCall(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE));
345   amat = (Mat_BlockMat*)(newmat)->data;
346 
347   /* preallocate the submatrices */
348   PetscCall(PetscMalloc1(bs,&llens));
349   for (i=0; i<mbs; i++) { /* loops for block rows */
350     for (j=0; j<bs; j++) {
351       ii[j]    = a->j + a->i[i*bs + j];
352       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
353     }
354 
355     currentcol = 1000000000;
356     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
357       if (ilens[j] > 0) {
358         currentcol = PetscMin(currentcol,ii[j][0]/bs);
359       }
360     }
361 
362     while (PETSC_TRUE) {  /* loops over blocks in block row */
363       notdone = PETSC_FALSE;
364       nextcol = 1000000000;
365       PetscCall(PetscArrayzero(llens,bs));
366       for (j=0; j<bs; j++) { /* loop over rows in block */
367         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
368           ii[j]++;
369           ilens[j]--;
370           llens[j]++;
371         }
372         if (ilens[j] > 0) {
373           notdone = PETSC_TRUE;
374           nextcol = PetscMin(nextcol,ii[j][0]/bs);
375         }
376       }
377       PetscCheck(cnt < amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt);
378       if (!flg || currentcol >= i) {
379         amat->j[cnt] = currentcol;
380         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++));
381       }
382 
383       if (!notdone) break;
384       currentcol = nextcol;
385     }
386     amat->ilen[i] = lens[i];
387   }
388 
389   PetscCall(PetscFree3(lens,ii,ilens));
390   PetscCall(PetscFree(llens));
391 
392   /* copy over the matrix, one row at a time */
393   for (i=0; i<m; i++) {
394     PetscCall(MatGetRow(tmpA,i,&ncols,&cols,&values));
395     PetscCall(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES));
396     PetscCall(MatRestoreRow(tmpA,i,&ncols,&cols,&values));
397   }
398   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
399   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
400   PetscFunctionReturn(0);
401 }
402 
403 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
404 {
405   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
406   const char        *name;
407   PetscViewerFormat format;
408 
409   PetscFunctionBegin;
410   PetscCall(PetscObjectGetName((PetscObject)A,&name));
411   PetscCall(PetscViewerGetFormat(viewer,&format));
412   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
413     PetscCall(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz));
414     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n"));
415   }
416   PetscFunctionReturn(0);
417 }
418 
419 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
420 {
421   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
422   PetscInt       i;
423 
424   PetscFunctionBegin;
425   PetscCall(VecDestroy(&bmat->right));
426   PetscCall(VecDestroy(&bmat->left));
427   PetscCall(VecDestroy(&bmat->middle));
428   PetscCall(VecDestroy(&bmat->workb));
429   if (bmat->diags) {
430     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
431       PetscCall(MatDestroy(&bmat->diags[i]));
432     }
433   }
434   if (bmat->a) {
435     for (i=0; i<bmat->nz; i++) {
436       PetscCall(MatDestroy(&bmat->a[i]));
437     }
438   }
439   PetscCall(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
440   PetscCall(PetscFree(mat->data));
441   PetscFunctionReturn(0);
442 }
443 
444 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
445 {
446   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
447   PetscScalar    *xx,*yy;
448   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
449   Mat            *aa;
450 
451   PetscFunctionBegin;
452   /*
453      Standard CSR multiply except each entry is a Mat
454   */
455   PetscCall(VecGetArray(x,&xx));
456 
457   PetscCall(VecSet(y,0.0));
458   PetscCall(VecGetArray(y,&yy));
459   aj   = bmat->j;
460   aa   = bmat->a;
461   ii   = bmat->i;
462   for (i=0; i<m; i++) {
463     jrow = ii[i];
464     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
465     n    = ii[i+1] - jrow;
466     for (j=0; j<n; j++) {
467       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
468       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
469       PetscCall(VecResetArray(bmat->right));
470       jrow++;
471     }
472     PetscCall(VecResetArray(bmat->left));
473   }
474   PetscCall(VecRestoreArray(x,&xx));
475   PetscCall(VecRestoreArray(y,&yy));
476   PetscFunctionReturn(0);
477 }
478 
479 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
480 {
481   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
482   PetscScalar    *xx,*yy;
483   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
484   Mat            *aa;
485 
486   PetscFunctionBegin;
487   /*
488      Standard CSR multiply except each entry is a Mat
489   */
490   PetscCall(VecGetArray(x,&xx));
491 
492   PetscCall(VecSet(y,0.0));
493   PetscCall(VecGetArray(y,&yy));
494   aj   = bmat->j;
495   aa   = bmat->a;
496   ii   = bmat->i;
497   for (i=0; i<m; i++) {
498     jrow = ii[i];
499     n    = ii[i+1] - jrow;
500     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
501     PetscCall(VecPlaceArray(bmat->middle,xx + bs*i));
502     /* if we ALWAYS required a diagonal entry then could remove this if test */
503     if (aj[jrow] == i) {
504       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
505       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
506       PetscCall(VecResetArray(bmat->right));
507       jrow++;
508       n--;
509     }
510     for (j=0; j<n; j++) {
511       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));            /* upper triangular part */
512       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
513       PetscCall(VecResetArray(bmat->right));
514 
515       PetscCall(VecPlaceArray(bmat->right,yy + bs*aj[jrow]));            /* lower triangular part */
516       PetscCall(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right));
517       PetscCall(VecResetArray(bmat->right));
518       jrow++;
519     }
520     PetscCall(VecResetArray(bmat->left));
521     PetscCall(VecResetArray(bmat->middle));
522   }
523   PetscCall(VecRestoreArray(x,&xx));
524   PetscCall(VecRestoreArray(y,&yy));
525   PetscFunctionReturn(0);
526 }
527 
528 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
529 {
530   PetscFunctionBegin;
531   PetscFunctionReturn(0);
532 }
533 
534 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
535 {
536   PetscFunctionBegin;
537   PetscFunctionReturn(0);
538 }
539 
540 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
541 {
542   PetscFunctionBegin;
543   PetscFunctionReturn(0);
544 }
545 
546 /*
547      Adds diagonal pointers to sparse matrix structure.
548 */
549 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
550 {
551   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
552   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;
553 
554   PetscFunctionBegin;
555   if (!a->diag) {
556     PetscCall(PetscMalloc1(mbs,&a->diag));
557   }
558   for (i=0; i<mbs; i++) {
559     a->diag[i] = a->i[i+1];
560     for (j=a->i[i]; j<a->i[i+1]; j++) {
561       if (a->j[j] == i) {
562         a->diag[i] = j;
563         break;
564       }
565     }
566   }
567   PetscFunctionReturn(0);
568 }
569 
570 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
571 {
572   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
573   Mat_SeqAIJ     *c;
574   PetscInt       i,k,first,step,lensi,nrows,ncols;
575   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
576   PetscScalar    *a_new;
577   Mat            C,*aa = a->a;
578   PetscBool      stride,equal;
579 
580   PetscFunctionBegin;
581   PetscCall(ISEqual(isrow,iscol,&equal));
582   PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
583   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride));
584   PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
585   PetscCall(ISStrideGetInfo(iscol,&first,&step));
586   PetscCheck(step == A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");
587 
588   PetscCall(ISGetLocalSize(isrow,&nrows));
589   ncols = nrows;
590 
591   /* create submatrix */
592   if (scall == MAT_REUSE_MATRIX) {
593     PetscInt n_cols,n_rows;
594     C    = *B;
595     PetscCall(MatGetSize(C,&n_rows,&n_cols));
596     PetscCheck(n_rows == nrows && n_cols == ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
597     PetscCall(MatZeroEntries(C));
598   } else {
599     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
600     PetscCall(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE));
601     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C,MATSEQSBAIJ));
602     else PetscCall(MatSetType(C,MATSEQAIJ));
603     PetscCall(MatSeqAIJSetPreallocation(C,0,ailen));
604     PetscCall(MatSeqSBAIJSetPreallocation(C,1,0,ailen));
605   }
606   c = (Mat_SeqAIJ*)C->data;
607 
608   /* loop over rows inserting into submatrix */
609   a_new = c->a;
610   j_new = c->j;
611   i_new = c->i;
612 
613   for (i=0; i<nrows; i++) {
614     lensi = ailen[i];
615     for (k=0; k<lensi; k++) {
616       *j_new++ = *aj++;
617       PetscCall(MatGetValue(*aa++,first,first,a_new++));
618     }
619     i_new[i+1] = i_new[i] + lensi;
620     c->ilen[i] = lensi;
621   }
622 
623   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
624   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
625   *B   = C;
626   PetscFunctionReturn(0);
627 }
628 
629 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
630 {
631   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
632   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
633   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
634   Mat            *aa    = a->a,*ap;
635 
636   PetscFunctionBegin;
637   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
638 
639   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
640   for (i=1; i<m; i++) {
641     /* move each row back by the amount of empty slots (fshift) before it*/
642     fshift += imax[i-1] - ailen[i-1];
643     rmax    = PetscMax(rmax,ailen[i]);
644     if (fshift) {
645       ip = aj + ai[i];
646       ap = aa + ai[i];
647       N  = ailen[i];
648       for (j=0; j<N; j++) {
649         ip[j-fshift] = ip[j];
650         ap[j-fshift] = ap[j];
651       }
652     }
653     ai[i] = ai[i-1] + ailen[i-1];
654   }
655   if (m) {
656     fshift += imax[m-1] - ailen[m-1];
657     ai[m]   = ai[m-1] + ailen[m-1];
658   }
659   /* reset ilen and imax for each row */
660   for (i=0; i<m; i++) {
661     ailen[i] = imax[i] = ai[i+1] - ai[i];
662   }
663   a->nz = ai[m];
664   for (i=0; i<a->nz; i++) {
665     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
666     PetscCall(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY));
667     PetscCall(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY));
668   }
669   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz));
670   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs));
671   PetscCall(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax));
672 
673   A->info.mallocs    += a->reallocs;
674   a->reallocs         = 0;
675   A->info.nz_unneeded = (double)fshift;
676   a->rmax             = rmax;
677   PetscCall(MatMarkDiagonal_BlockMat(A));
678   PetscFunctionReturn(0);
679 }
680 
681 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
682 {
683   PetscFunctionBegin;
684   if (opt == MAT_SYMMETRIC && flg) {
685     A->ops->sor  = MatSOR_BlockMat_Symmetric;
686     A->ops->mult = MatMult_BlockMat_Symmetric;
687   } else {
688     PetscCall(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]));
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
694                                        NULL,
695                                        NULL,
696                                        MatMult_BlockMat,
697                                /*  4*/ MatMultAdd_BlockMat,
698                                        MatMultTranspose_BlockMat,
699                                        MatMultTransposeAdd_BlockMat,
700                                        NULL,
701                                        NULL,
702                                        NULL,
703                                /* 10*/ NULL,
704                                        NULL,
705                                        NULL,
706                                        MatSOR_BlockMat,
707                                        NULL,
708                                /* 15*/ NULL,
709                                        NULL,
710                                        NULL,
711                                        NULL,
712                                        NULL,
713                                /* 20*/ NULL,
714                                        MatAssemblyEnd_BlockMat,
715                                        MatSetOption_BlockMat,
716                                        NULL,
717                                /* 24*/ NULL,
718                                        NULL,
719                                        NULL,
720                                        NULL,
721                                        NULL,
722                                /* 29*/ NULL,
723                                        NULL,
724                                        NULL,
725                                        NULL,
726                                        NULL,
727                                /* 34*/ NULL,
728                                        NULL,
729                                        NULL,
730                                        NULL,
731                                        NULL,
732                                /* 39*/ NULL,
733                                        NULL,
734                                        NULL,
735                                        NULL,
736                                        NULL,
737                                /* 44*/ NULL,
738                                        NULL,
739                                        MatShift_Basic,
740                                        NULL,
741                                        NULL,
742                                /* 49*/ NULL,
743                                        NULL,
744                                        NULL,
745                                        NULL,
746                                        NULL,
747                                /* 54*/ NULL,
748                                        NULL,
749                                        NULL,
750                                        NULL,
751                                        NULL,
752                                /* 59*/ MatCreateSubMatrix_BlockMat,
753                                        MatDestroy_BlockMat,
754                                        MatView_BlockMat,
755                                        NULL,
756                                        NULL,
757                                /* 64*/ NULL,
758                                        NULL,
759                                        NULL,
760                                        NULL,
761                                        NULL,
762                                /* 69*/ NULL,
763                                        NULL,
764                                        NULL,
765                                        NULL,
766                                        NULL,
767                                /* 74*/ NULL,
768                                        NULL,
769                                        NULL,
770                                        NULL,
771                                        NULL,
772                                /* 79*/ NULL,
773                                        NULL,
774                                        NULL,
775                                        NULL,
776                                        MatLoad_BlockMat,
777                                /* 84*/ NULL,
778                                        NULL,
779                                        NULL,
780                                        NULL,
781                                        NULL,
782                                /* 89*/ NULL,
783                                        NULL,
784                                        NULL,
785                                        NULL,
786                                        NULL,
787                                /* 94*/ NULL,
788                                        NULL,
789                                        NULL,
790                                        NULL,
791                                        NULL,
792                                /* 99*/ NULL,
793                                        NULL,
794                                        NULL,
795                                        NULL,
796                                        NULL,
797                                /*104*/ NULL,
798                                        NULL,
799                                        NULL,
800                                        NULL,
801                                        NULL,
802                                /*109*/ NULL,
803                                        NULL,
804                                        NULL,
805                                        NULL,
806                                        NULL,
807                                /*114*/ NULL,
808                                        NULL,
809                                        NULL,
810                                        NULL,
811                                        NULL,
812                                /*119*/ NULL,
813                                        NULL,
814                                        NULL,
815                                        NULL,
816                                        NULL,
817                                /*124*/ NULL,
818                                        NULL,
819                                        NULL,
820                                        NULL,
821                                        NULL,
822                                /*129*/ NULL,
823                                        NULL,
824                                        NULL,
825                                        NULL,
826                                        NULL,
827                                /*134*/ NULL,
828                                        NULL,
829                                        NULL,
830                                        NULL,
831                                        NULL,
832                                /*139*/ NULL,
833                                        NULL,
834                                        NULL,
835                                        NULL,
836                                        NULL,
837                                /*144*/ NULL,
838                                        NULL,
839                                        NULL,
840                                        NULL,
841                                        NULL,
842                                        NULL
843 };
844 
845 /*@C
846    MatBlockMatSetPreallocation - For good matrix assembly performance
847    the user should preallocate the matrix storage by setting the parameter nz
848    (or the array nnz).  By setting these parameters accurately, performance
849    during matrix assembly can be increased by more than a factor of 50.
850 
851    Collective
852 
853    Input Parameters:
854 +  B - The matrix
855 .  bs - size of each block in matrix
856 .  nz - number of nonzeros per block row (same for all rows)
857 -  nnz - array containing the number of nonzeros in the various block rows
858          (possibly different for each row) or NULL
859 
860    Notes:
861      If nnz is given then nz is ignored
862 
863    Specify the preallocated storage with either nz or nnz (not both).
864    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
865    allocation.  For large problems you MUST preallocate memory or you
866    will get TERRIBLE performance, see the users' manual chapter on matrices.
867 
868    Level: intermediate
869 
870 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
871 
872 @*/
873 PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
874 {
875   PetscFunctionBegin;
876   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
877   PetscFunctionReturn(0);
878 }
879 
880 static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
881 {
882   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
883   PetscInt       i;
884 
885   PetscFunctionBegin;
886   PetscCall(PetscLayoutSetBlockSize(A->rmap,bs));
887   PetscCall(PetscLayoutSetBlockSize(A->cmap,bs));
888   PetscCall(PetscLayoutSetUp(A->rmap));
889   PetscCall(PetscLayoutSetUp(A->cmap));
890   PetscCall(PetscLayoutGetBlockSize(A->rmap,&bs));
891 
892   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
893   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
894   if (nnz) {
895     for (i=0; i<A->rmap->n/bs; i++) {
896       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
897       PetscCheck(nnz[i] <= A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs);
898     }
899   }
900   bmat->mbs = A->rmap->n/bs;
901 
902   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right));
903   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle));
904   PetscCall(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left));
905 
906   if (!bmat->imax) {
907     PetscCall(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen));
908     PetscCall(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt)));
909   }
910   if (PetscLikely(nnz)) {
911     nz = 0;
912     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
913       bmat->imax[i] = nnz[i];
914       nz           += nnz[i];
915     }
916   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
917 
918   /* bmat->ilen will count nonzeros in each row so far. */
919   PetscCall(PetscArrayzero(bmat->ilen,bmat->mbs));
920 
921   /* allocate the matrix space */
922   PetscCall(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
923   PetscCall(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i));
924   PetscCall(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt))));
925   bmat->i[0] = 0;
926   for (i=1; i<bmat->mbs+1; i++) {
927     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
928   }
929   bmat->singlemalloc = PETSC_TRUE;
930   bmat->free_a       = PETSC_TRUE;
931   bmat->free_ij      = PETSC_TRUE;
932 
933   bmat->nz            = 0;
934   bmat->maxnz         = nz;
935   A->info.nz_unneeded = (double)bmat->maxnz;
936   PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
937   PetscFunctionReturn(0);
938 }
939 
940 /*MC
941    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
942                  consisting of (usually) sparse blocks.
943 
944   Level: advanced
945 
946 .seealso: `MatCreateBlockMat()`
947 
948 M*/
949 
950 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
951 {
952   Mat_BlockMat   *b;
953 
954   PetscFunctionBegin;
955   PetscCall(PetscNewLog(A,&b));
956   A->data = (void*)b;
957   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
958 
959   A->assembled    = PETSC_TRUE;
960   A->preallocated = PETSC_FALSE;
961   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT));
962 
963   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat));
964   PetscFunctionReturn(0);
965 }
966 
967 /*@C
968    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object
969 
970   Collective
971 
972    Input Parameters:
973 +  comm - MPI communicator
974 .  m - number of rows
975 .  n  - number of columns
976 .  bs - size of each submatrix
977 .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
978 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
979 
980    Output Parameter:
981 .  A - the matrix
982 
983    Level: intermediate
984 
985    Notes:
986     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
987    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
988 
989    For matrices containing parallel submatrices and variable block sizes, see MATNEST.
990 
991 .seealso: `MATBLOCKMAT`, `MatCreateNest()`
992 @*/
993 PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
994 {
995   PetscFunctionBegin;
996   PetscCall(MatCreate(comm,A));
997   PetscCall(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
998   PetscCall(MatSetType(*A,MATBLOCKMAT));
999   PetscCall(MatBlockMatSetPreallocation(*A,bs,nz,nnz));
1000   PetscFunctionReturn(0);
1001 }
1002