xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 5de52c6d2b8d6de382140bd9fae367e1f02f2f13)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>              /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h>    /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left,right,middle,workb;                 /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                row,col;
26   MatFactorInfo     info;
27   Vec               left = a->left,right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its*lits;
32   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
33   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
34   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
36   if ((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)) {
37     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot do backward sweep without forward sweep");
38   }
39 
40   if (!a->diags) {
41     PetscCall(PetscMalloc1(mbs,&a->diags));
42     PetscCall(MatFactorInfoInitialize(&info));
43     for (i=0; i<mbs; i++) {
44       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
45       PetscCall(MatCholeskyFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,&info));
46       PetscCall(MatCholeskyFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
47       PetscCall(ISDestroy(&row));
48       PetscCall(ISDestroy(&col));
49     }
50     PetscCall(VecDuplicate(bb,&a->workb));
51   }
52   diag = a->diags;
53 
54   PetscCall(VecSet(xx,0.0));
55   PetscCall(VecGetArray(xx,&x));
56   /* copy right hand side because it must be modified during iteration */
57   PetscCall(VecCopy(bb,a->workb));
58   PetscCall(VecGetArrayRead(a->workb,&b));
59 
60   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
61   while (its--) {
62     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
63 
64       for (i=0; i<mbs; i++) {
65         n   = a->i[i+1] - a->i[i] - 1;
66         idx = a->j + a->i[i] + 1;
67         v   = a->a + a->i[i] + 1;
68 
69         PetscCall(VecSet(left,0.0));
70         for (j=0; j<n; j++) {
71           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
72           PetscCall(MatMultAdd(v[j],right,left,left));
73           PetscCall(VecResetArray(right));
74         }
75         PetscCall(VecPlaceArray(right,b + i*bs));
76         PetscCall(VecAYPX(left,-1.0,right));
77         PetscCall(VecResetArray(right));
78 
79         PetscCall(VecPlaceArray(right,x + i*bs));
80         PetscCall(MatSolve(diag[i],left,right));
81 
82         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
83         for (j=0; j<n; j++) {
84           PetscCall(MatMultTranspose(v[j],right,left));
85           PetscCall(VecPlaceArray(middle,b + idx[j]*bs));
86           PetscCall(VecAXPY(middle,-1.0,left));
87           PetscCall(VecResetArray(middle));
88         }
89         PetscCall(VecResetArray(right));
90 
91       }
92     }
93     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94 
95       for (i=mbs-1; i>=0; i--) {
96         n   = a->i[i+1] - a->i[i] - 1;
97         idx = a->j + a->i[i] + 1;
98         v   = a->a + a->i[i] + 1;
99 
100         PetscCall(VecSet(left,0.0));
101         for (j=0; j<n; j++) {
102           PetscCall(VecPlaceArray(right,x + idx[j]*bs));
103           PetscCall(MatMultAdd(v[j],right,left,left));
104           PetscCall(VecResetArray(right));
105         }
106         PetscCall(VecPlaceArray(right,b + i*bs));
107         PetscCall(VecAYPX(left,-1.0,right));
108         PetscCall(VecResetArray(right));
109 
110         PetscCall(VecPlaceArray(right,x + i*bs));
111         PetscCall(MatSolve(diag[i],left,right));
112         PetscCall(VecResetArray(right));
113 
114       }
115     }
116   }
117   PetscCall(VecRestoreArray(xx,&x));
118   PetscCall(VecRestoreArrayRead(a->workb,&b));
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
123 {
124   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
125   PetscScalar       *x;
126   const Mat         *v;
127   const PetscScalar *b;
128   PetscInt          n = A->cmap->n,i,mbs = n/A->rmap->bs,j,bs = A->rmap->bs;
129   const PetscInt    *idx;
130   IS                row,col;
131   MatFactorInfo     info;
132   Vec               left = a->left,right = a->right;
133   Mat               *diag;
134 
135   PetscFunctionBegin;
136   its = its*lits;
137   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
138   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
139   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for omega not equal to 1.0");
140   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for fshift");
141 
142   if (!a->diags) {
143     PetscCall(PetscMalloc1(mbs,&a->diags));
144     PetscCall(MatFactorInfoInitialize(&info));
145     for (i=0; i<mbs; i++) {
146       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND,&row,&col));
147       PetscCall(MatLUFactorSymbolic(a->diags[i],a->a[a->diag[i]],row,col,&info));
148       PetscCall(MatLUFactorNumeric(a->diags[i],a->a[a->diag[i]],&info));
149       PetscCall(ISDestroy(&row));
150       PetscCall(ISDestroy(&col));
151     }
152   }
153   diag = a->diags;
154 
155   PetscCall(VecSet(xx,0.0));
156   PetscCall(VecGetArray(xx,&x));
157   PetscCall(VecGetArrayRead(bb,&b));
158 
159   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
160   while (its--) {
161     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
162 
163       for (i=0; i<mbs; i++) {
164         n   = a->i[i+1] - a->i[i];
165         idx = a->j + a->i[i];
166         v   = a->a + a->i[i];
167 
168         PetscCall(VecSet(left,0.0));
169         for (j=0; j<n; j++) {
170           if (idx[j] != i) {
171             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
172             PetscCall(MatMultAdd(v[j],right,left,left));
173             PetscCall(VecResetArray(right));
174           }
175         }
176         PetscCall(VecPlaceArray(right,b + i*bs));
177         PetscCall(VecAYPX(left,-1.0,right));
178         PetscCall(VecResetArray(right));
179 
180         PetscCall(VecPlaceArray(right,x + i*bs));
181         PetscCall(MatSolve(diag[i],left,right));
182         PetscCall(VecResetArray(right));
183       }
184     }
185     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186 
187       for (i=mbs-1; i>=0; i--) {
188         n   = a->i[i+1] - a->i[i];
189         idx = a->j + a->i[i];
190         v   = a->a + a->i[i];
191 
192         PetscCall(VecSet(left,0.0));
193         for (j=0; j<n; j++) {
194           if (idx[j] != i) {
195             PetscCall(VecPlaceArray(right,x + idx[j]*bs));
196             PetscCall(MatMultAdd(v[j],right,left,left));
197             PetscCall(VecResetArray(right));
198           }
199         }
200         PetscCall(VecPlaceArray(right,b + i*bs));
201         PetscCall(VecAYPX(left,-1.0,right));
202         PetscCall(VecResetArray(right));
203 
204         PetscCall(VecPlaceArray(right,x + i*bs));
205         PetscCall(MatSolve(diag[i],left,right));
206         PetscCall(VecResetArray(right));
207 
208       }
209     }
210   }
211   PetscCall(VecRestoreArray(xx,&x));
212   PetscCall(VecRestoreArrayRead(bb,&b));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
217 {
218   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
219   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
220   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
221   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
222   PetscInt       ridx,cidx;
223   PetscBool      roworiented=a->roworiented;
224   MatScalar      value;
225   Mat            *ap,*aa = a->a;
226 
227   PetscFunctionBegin;
228   for (k=0; k<m; k++) { /* loop over added rows */
229     row  = im[k];
230     brow = row/bs;
231     if (row < 0) continue;
232     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
233     rp   = aj + ai[brow];
234     ap   = aa + ai[brow];
235     rmax = imax[brow];
236     nrow = ailen[brow];
237     low  = 0;
238     high = nrow;
239     for (l=0; l<n; l++) { /* loop over added columns */
240       if (in[l] < 0) continue;
241       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
242       col = in[l]; bcol = col/bs;
243       if (A->symmetric && brow > bcol) continue;
244       ridx = row % bs; cidx = col % bs;
245       if (roworiented) value = v[l + k*n];
246       else value = v[k + l*m];
247 
248       if (col <= lastcol) low = 0;
249       else high = nrow;
250       lastcol = col;
251       while (high-low > 7) {
252         t = (low+high)/2;
253         if (rp[t] > bcol) high = t;
254         else              low  = t;
255       }
256       for (i=low; i<high; i++) {
257         if (rp[i] > bcol) break;
258         if (rp[i] == bcol) goto noinsert1;
259       }
260       if (nonew == 1) goto noinsert1;
261       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
262       MatSeqXAIJReallocateAIJ(A,a->mbs,1,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,Mat);
263       N = nrow++ - 1; high++;
264       /* shift up all the later entries in this row */
265       for (ii=N; ii>=i; ii--) {
266         rp[ii+1] = rp[ii];
267         ap[ii+1] = ap[ii];
268       }
269       if (N>=i) ap[i] = NULL;
270       rp[i] = bcol;
271       a->nz++;
272       A->nonzerostate++;
273 noinsert1:;
274       if (!*(ap+i)) {
275         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,NULL,ap+i));
276       }
277       PetscCall(MatSetValues(ap[i],1,&ridx,1,&cidx,&value,is));
278       low  = i;
279     }
280     ailen[brow] = nrow;
281   }
282   PetscFunctionReturn(0);
283 }
284 
285 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
286 {
287   Mat               tmpA;
288   PetscInt          i,j,m,n,bs = 1,ncols,*lens,currentcol,mbs,**ii,*ilens,nextcol,*llens,cnt = 0;
289   const PetscInt    *cols;
290   const PetscScalar *values;
291   PetscBool         flg = PETSC_FALSE,notdone;
292   Mat_SeqAIJ        *a;
293   Mat_BlockMat      *amat;
294 
295   PetscFunctionBegin;
296   /* force binary viewer to load .info file if it has not yet done so */
297   PetscCall(PetscViewerSetUp(viewer));
298   PetscCall(MatCreate(PETSC_COMM_SELF,&tmpA));
299   PetscCall(MatSetType(tmpA,MATSEQAIJ));
300   PetscCall(MatLoad_SeqAIJ(tmpA,viewer));
301 
302   PetscCall(MatGetLocalSize(tmpA,&m,&n));
303   PetscOptionsBegin(PETSC_COMM_SELF,NULL,"Options for loading BlockMat matrix 1","Mat");
304   PetscCall(PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL));
305   PetscCall(PetscOptionsBool("-matload_symmetric","Store the matrix as symmetric","MatLoad",flg,&flg,NULL));
306   PetscOptionsEnd();
307 
308   /* Determine number of nonzero blocks for each block row */
309   a    = (Mat_SeqAIJ*) tmpA->data;
310   mbs  = m/bs;
311   PetscCall(PetscMalloc3(mbs,&lens,bs,&ii,bs,&ilens));
312   PetscCall(PetscArrayzero(lens,mbs));
313 
314   for (i=0; i<mbs; i++) {
315     for (j=0; j<bs; j++) {
316       ii[j]    = a->j + a->i[i*bs + j];
317       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
318     }
319 
320     currentcol = -1;
321     while (PETSC_TRUE) {
322       notdone = PETSC_FALSE;
323       nextcol = 1000000000;
324       for (j=0; j<bs; j++) {
325         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) {
326           ii[j]++;
327           ilens[j]--;
328         }
329         if (ilens[j] > 0) {
330           notdone = PETSC_TRUE;
331           nextcol = PetscMin(nextcol,ii[j][0]/bs);
332         }
333       }
334       if (!notdone) break;
335       if (!flg || (nextcol >= i)) lens[i]++;
336       currentcol = nextcol;
337     }
338   }
339 
340   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
341     PetscCall(MatSetSizes(newmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
342   }
343   PetscCall(MatBlockMatSetPreallocation(newmat,bs,0,lens));
344   if (flg) {
345     PetscCall(MatSetOption(newmat,MAT_SYMMETRIC,PETSC_TRUE));
346   }
347   amat = (Mat_BlockMat*)(newmat)->data;
348 
349   /* preallocate the submatrices */
350   PetscCall(PetscMalloc1(bs,&llens));
351   for (i=0; i<mbs; i++) { /* loops for block rows */
352     for (j=0; j<bs; j++) {
353       ii[j]    = a->j + a->i[i*bs + j];
354       ilens[j] = a->i[i*bs + j + 1] - a->i[i*bs + j];
355     }
356 
357     currentcol = 1000000000;
358     for (j=0; j<bs; j++) { /* loop over rows in block finding first nonzero block */
359       if (ilens[j] > 0) {
360         currentcol = PetscMin(currentcol,ii[j][0]/bs);
361       }
362     }
363 
364     while (PETSC_TRUE) {  /* loops over blocks in block row */
365       notdone = PETSC_FALSE;
366       nextcol = 1000000000;
367       PetscCall(PetscArrayzero(llens,bs));
368       for (j=0; j<bs; j++) { /* loop over rows in block */
369         while ((ilens[j] > 0 && ii[j][0]/bs <= currentcol)) { /* loop over columns in row */
370           ii[j]++;
371           ilens[j]--;
372           llens[j]++;
373         }
374         if (ilens[j] > 0) {
375           notdone = PETSC_TRUE;
376           nextcol = PetscMin(nextcol,ii[j][0]/bs);
377         }
378       }
379       PetscCheck(cnt < amat->maxnz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of blocks found greater than expected %" PetscInt_FMT,cnt);
380       if (!flg || currentcol >= i) {
381         amat->j[cnt] = currentcol;
382         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,bs,bs,0,llens,amat->a+cnt++));
383       }
384 
385       if (!notdone) break;
386       currentcol = nextcol;
387     }
388     amat->ilen[i] = lens[i];
389   }
390 
391   PetscCall(PetscFree3(lens,ii,ilens));
392   PetscCall(PetscFree(llens));
393 
394   /* copy over the matrix, one row at a time */
395   for (i=0; i<m; i++) {
396     PetscCall(MatGetRow(tmpA,i,&ncols,&cols,&values));
397     PetscCall(MatSetValues(newmat,1,&i,ncols,cols,values,INSERT_VALUES));
398     PetscCall(MatRestoreRow(tmpA,i,&ncols,&cols,&values));
399   }
400   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
401   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
402   PetscFunctionReturn(0);
403 }
404 
405 static PetscErrorCode MatView_BlockMat(Mat A,PetscViewer viewer)
406 {
407   Mat_BlockMat      *a = (Mat_BlockMat*)A->data;
408   const char        *name;
409   PetscViewerFormat format;
410 
411   PetscFunctionBegin;
412   PetscCall(PetscObjectGetName((PetscObject)A,&name));
413   PetscCall(PetscViewerGetFormat(viewer,&format));
414   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
415     PetscCall(PetscViewerASCIIPrintf(viewer,"Nonzero block matrices = %" PetscInt_FMT " \n",a->nz));
416     if (A->symmetric) {
417       PetscCall(PetscViewerASCIIPrintf(viewer,"Only upper triangular part of symmetric matrix is stored\n"));
418     }
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
424 {
425   Mat_BlockMat   *bmat = (Mat_BlockMat*)mat->data;
426   PetscInt       i;
427 
428   PetscFunctionBegin;
429   PetscCall(VecDestroy(&bmat->right));
430   PetscCall(VecDestroy(&bmat->left));
431   PetscCall(VecDestroy(&bmat->middle));
432   PetscCall(VecDestroy(&bmat->workb));
433   if (bmat->diags) {
434     for (i=0; i<mat->rmap->n/mat->rmap->bs; i++) {
435       PetscCall(MatDestroy(&bmat->diags[i]));
436     }
437   }
438   if (bmat->a) {
439     for (i=0; i<bmat->nz; i++) {
440       PetscCall(MatDestroy(&bmat->a[i]));
441     }
442   }
443   PetscCall(MatSeqXAIJFreeAIJ(mat,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
444   PetscCall(PetscFree(mat->data));
445   PetscFunctionReturn(0);
446 }
447 
448 static PetscErrorCode MatMult_BlockMat(Mat A,Vec x,Vec y)
449 {
450   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
451   PetscScalar    *xx,*yy;
452   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
453   Mat            *aa;
454 
455   PetscFunctionBegin;
456   /*
457      Standard CSR multiply except each entry is a Mat
458   */
459   PetscCall(VecGetArray(x,&xx));
460 
461   PetscCall(VecSet(y,0.0));
462   PetscCall(VecGetArray(y,&yy));
463   aj   = bmat->j;
464   aa   = bmat->a;
465   ii   = bmat->i;
466   for (i=0; i<m; i++) {
467     jrow = ii[i];
468     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
469     n    = ii[i+1] - jrow;
470     for (j=0; j<n; j++) {
471       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
472       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
473       PetscCall(VecResetArray(bmat->right));
474       jrow++;
475     }
476     PetscCall(VecResetArray(bmat->left));
477   }
478   PetscCall(VecRestoreArray(x,&xx));
479   PetscCall(VecRestoreArray(y,&yy));
480   PetscFunctionReturn(0);
481 }
482 
483 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)
484 {
485   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
486   PetscScalar    *xx,*yy;
487   PetscInt       *aj,i,*ii,jrow,m = A->rmap->n/A->rmap->bs,bs = A->rmap->bs,n,j;
488   Mat            *aa;
489 
490   PetscFunctionBegin;
491   /*
492      Standard CSR multiply except each entry is a Mat
493   */
494   PetscCall(VecGetArray(x,&xx));
495 
496   PetscCall(VecSet(y,0.0));
497   PetscCall(VecGetArray(y,&yy));
498   aj   = bmat->j;
499   aa   = bmat->a;
500   ii   = bmat->i;
501   for (i=0; i<m; i++) {
502     jrow = ii[i];
503     n    = ii[i+1] - jrow;
504     PetscCall(VecPlaceArray(bmat->left,yy + bs*i));
505     PetscCall(VecPlaceArray(bmat->middle,xx + bs*i));
506     /* if we ALWAYS required a diagonal entry then could remove this if test */
507     if (aj[jrow] == i) {
508       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));
509       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
510       PetscCall(VecResetArray(bmat->right));
511       jrow++;
512       n--;
513     }
514     for (j=0; j<n; j++) {
515       PetscCall(VecPlaceArray(bmat->right,xx + bs*aj[jrow]));            /* upper triangular part */
516       PetscCall(MatMultAdd(aa[jrow],bmat->right,bmat->left,bmat->left));
517       PetscCall(VecResetArray(bmat->right));
518 
519       PetscCall(VecPlaceArray(bmat->right,yy + bs*aj[jrow]));            /* lower triangular part */
520       PetscCall(MatMultTransposeAdd(aa[jrow],bmat->middle,bmat->right,bmat->right));
521       PetscCall(VecResetArray(bmat->right));
522       jrow++;
523     }
524     PetscCall(VecResetArray(bmat->left));
525     PetscCall(VecResetArray(bmat->middle));
526   }
527   PetscCall(VecRestoreArray(x,&xx));
528   PetscCall(VecRestoreArray(y,&yy));
529   PetscFunctionReturn(0);
530 }
531 
532 static PetscErrorCode MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
533 {
534   PetscFunctionBegin;
535   PetscFunctionReturn(0);
536 }
537 
538 static PetscErrorCode MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)
539 {
540   PetscFunctionBegin;
541   PetscFunctionReturn(0);
542 }
543 
544 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)
545 {
546   PetscFunctionBegin;
547   PetscFunctionReturn(0);
548 }
549 
550 /*
551      Adds diagonal pointers to sparse matrix structure.
552 */
553 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
554 {
555   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
556   PetscInt       i,j,mbs = A->rmap->n/A->rmap->bs;
557 
558   PetscFunctionBegin;
559   if (!a->diag) {
560     PetscCall(PetscMalloc1(mbs,&a->diag));
561   }
562   for (i=0; i<mbs; i++) {
563     a->diag[i] = a->i[i+1];
564     for (j=a->i[i]; j<a->i[i+1]; j++) {
565       if (a->j[j] == i) {
566         a->diag[i] = j;
567         break;
568       }
569     }
570   }
571   PetscFunctionReturn(0);
572 }
573 
574 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
575 {
576   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
577   Mat_SeqAIJ     *c;
578   PetscInt       i,k,first,step,lensi,nrows,ncols;
579   PetscInt       *j_new,*i_new,*aj = a->j,*ailen = a->ilen;
580   PetscScalar    *a_new;
581   Mat            C,*aa = a->a;
582   PetscBool      stride,equal;
583 
584   PetscFunctionBegin;
585   PetscCall(ISEqual(isrow,iscol,&equal));
586   PetscCheck(equal,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for identical column and row indices");
587   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&stride));
588   PetscCheck(stride,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only for stride indices");
589   PetscCall(ISStrideGetInfo(iscol,&first,&step));
590   PetscCheck(step == A->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only select one entry from each block");
591 
592   PetscCall(ISGetLocalSize(isrow,&nrows));
593   ncols = nrows;
594 
595   /* create submatrix */
596   if (scall == MAT_REUSE_MATRIX) {
597     PetscInt n_cols,n_rows;
598     C    = *B;
599     PetscCall(MatGetSize(C,&n_rows,&n_cols));
600     PetscCheck(n_rows == nrows && n_cols == ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size");
601     PetscCall(MatZeroEntries(C));
602   } else {
603     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
604     PetscCall(MatSetSizes(C,nrows,ncols,PETSC_DETERMINE,PETSC_DETERMINE));
605     if (A->symmetric) {
606       PetscCall(MatSetType(C,MATSEQSBAIJ));
607     } else {
608       PetscCall(MatSetType(C,MATSEQAIJ));
609     }
610     PetscCall(MatSeqAIJSetPreallocation(C,0,ailen));
611     PetscCall(MatSeqSBAIJSetPreallocation(C,1,0,ailen));
612   }
613   c = (Mat_SeqAIJ*)C->data;
614 
615   /* loop over rows inserting into submatrix */
616   a_new = c->a;
617   j_new = c->j;
618   i_new = c->i;
619 
620   for (i=0; i<nrows; i++) {
621     lensi = ailen[i];
622     for (k=0; k<lensi; k++) {
623       *j_new++ = *aj++;
624       PetscCall(MatGetValue(*aa++,first,first,a_new++));
625     }
626     i_new[i+1] = i_new[i] + lensi;
627     c->ilen[i] = lensi;
628   }
629 
630   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
631   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
632   *B   = C;
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)
637 {
638   Mat_BlockMat   *a = (Mat_BlockMat*)A->data;
639   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
640   PetscInt       m      = a->mbs,*ip,N,*ailen = a->ilen,rmax = 0;
641   Mat            *aa    = a->a,*ap;
642 
643   PetscFunctionBegin;
644   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
645 
646   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
647   for (i=1; i<m; i++) {
648     /* move each row back by the amount of empty slots (fshift) before it*/
649     fshift += imax[i-1] - ailen[i-1];
650     rmax    = PetscMax(rmax,ailen[i]);
651     if (fshift) {
652       ip = aj + ai[i];
653       ap = aa + ai[i];
654       N  = ailen[i];
655       for (j=0; j<N; j++) {
656         ip[j-fshift] = ip[j];
657         ap[j-fshift] = ap[j];
658       }
659     }
660     ai[i] = ai[i-1] + ailen[i-1];
661   }
662   if (m) {
663     fshift += imax[m-1] - ailen[m-1];
664     ai[m]   = ai[m-1] + ailen[m-1];
665   }
666   /* reset ilen and imax for each row */
667   for (i=0; i<m; i++) {
668     ailen[i] = imax[i] = ai[i+1] - ai[i];
669   }
670   a->nz = ai[m];
671   for (i=0; i<a->nz; i++) {
672     PetscAssert(aa[i],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT,i,aj[i],a->nz);
673     PetscCall(MatAssemblyBegin(aa[i],MAT_FINAL_ASSEMBLY));
674     PetscCall(MatAssemblyEnd(aa[i],MAT_FINAL_ASSEMBLY));
675   }
676   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n",m,A->cmap->n/A->cmap->bs,fshift,a->nz));
677   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs));
678   PetscCall(PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",rmax));
679 
680   A->info.mallocs    += a->reallocs;
681   a->reallocs         = 0;
682   A->info.nz_unneeded = (double)fshift;
683   a->rmax             = rmax;
684   PetscCall(MatMarkDiagonal_BlockMat(A));
685   PetscFunctionReturn(0);
686 }
687 
688 static PetscErrorCode MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)
689 {
690   PetscFunctionBegin;
691   if (opt == MAT_SYMMETRIC && flg) {
692     A->ops->sor  = MatSOR_BlockMat_Symmetric;
693     A->ops->mult = MatMult_BlockMat_Symmetric;
694   } else {
695     PetscCall(PetscInfo(A,"Unused matrix option %s\n",MatOptions[opt]));
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
701                                        NULL,
702                                        NULL,
703                                        MatMult_BlockMat,
704                                /*  4*/ MatMultAdd_BlockMat,
705                                        MatMultTranspose_BlockMat,
706                                        MatMultTransposeAdd_BlockMat,
707                                        NULL,
708                                        NULL,
709                                        NULL,
710                                /* 10*/ NULL,
711                                        NULL,
712                                        NULL,
713                                        MatSOR_BlockMat,
714                                        NULL,
715                                /* 15*/ NULL,
716                                        NULL,
717                                        NULL,
718                                        NULL,
719                                        NULL,
720                                /* 20*/ NULL,
721                                        MatAssemblyEnd_BlockMat,
722                                        MatSetOption_BlockMat,
723                                        NULL,
724                                /* 24*/ NULL,
725                                        NULL,
726                                        NULL,
727                                        NULL,
728                                        NULL,
729                                /* 29*/ NULL,
730                                        NULL,
731                                        NULL,
732                                        NULL,
733                                        NULL,
734                                /* 34*/ NULL,
735                                        NULL,
736                                        NULL,
737                                        NULL,
738                                        NULL,
739                                /* 39*/ NULL,
740                                        NULL,
741                                        NULL,
742                                        NULL,
743                                        NULL,
744                                /* 44*/ NULL,
745                                        NULL,
746                                        MatShift_Basic,
747                                        NULL,
748                                        NULL,
749                                /* 49*/ NULL,
750                                        NULL,
751                                        NULL,
752                                        NULL,
753                                        NULL,
754                                /* 54*/ NULL,
755                                        NULL,
756                                        NULL,
757                                        NULL,
758                                        NULL,
759                                /* 59*/ MatCreateSubMatrix_BlockMat,
760                                        MatDestroy_BlockMat,
761                                        MatView_BlockMat,
762                                        NULL,
763                                        NULL,
764                                /* 64*/ NULL,
765                                        NULL,
766                                        NULL,
767                                        NULL,
768                                        NULL,
769                                /* 69*/ NULL,
770                                        NULL,
771                                        NULL,
772                                        NULL,
773                                        NULL,
774                                /* 74*/ NULL,
775                                        NULL,
776                                        NULL,
777                                        NULL,
778                                        NULL,
779                                /* 79*/ NULL,
780                                        NULL,
781                                        NULL,
782                                        NULL,
783                                        MatLoad_BlockMat,
784                                /* 84*/ NULL,
785                                        NULL,
786                                        NULL,
787                                        NULL,
788                                        NULL,
789                                /* 89*/ NULL,
790                                        NULL,
791                                        NULL,
792                                        NULL,
793                                        NULL,
794                                /* 94*/ NULL,
795                                        NULL,
796                                        NULL,
797                                        NULL,
798                                        NULL,
799                                /* 99*/ NULL,
800                                        NULL,
801                                        NULL,
802                                        NULL,
803                                        NULL,
804                                /*104*/ NULL,
805                                        NULL,
806                                        NULL,
807                                        NULL,
808                                        NULL,
809                                /*109*/ NULL,
810                                        NULL,
811                                        NULL,
812                                        NULL,
813                                        NULL,
814                                /*114*/ NULL,
815                                        NULL,
816                                        NULL,
817                                        NULL,
818                                        NULL,
819                                /*119*/ NULL,
820                                        NULL,
821                                        NULL,
822                                        NULL,
823                                        NULL,
824                                /*124*/ NULL,
825                                        NULL,
826                                        NULL,
827                                        NULL,
828                                        NULL,
829                                /*129*/ NULL,
830                                        NULL,
831                                        NULL,
832                                        NULL,
833                                        NULL,
834                                /*134*/ NULL,
835                                        NULL,
836                                        NULL,
837                                        NULL,
838                                        NULL,
839                                /*139*/ NULL,
840                                        NULL,
841                                        NULL,
842                                        NULL,
843                                        NULL,
844                                /*144*/ NULL,
845                                        NULL,
846                                        NULL,
847                                        NULL
848 };
849 
850 /*@C
851    MatBlockMatSetPreallocation - For good matrix assembly performance
852    the user should preallocate the matrix storage by setting the parameter nz
853    (or the array nnz).  By setting these parameters accurately, performance
854    during matrix assembly can be increased by more than a factor of 50.
855 
856    Collective
857 
858    Input Parameters:
859 +  B - The matrix
860 .  bs - size of each block in matrix
861 .  nz - number of nonzeros per block row (same for all rows)
862 -  nnz - array containing the number of nonzeros in the various block rows
863          (possibly different for each row) or NULL
864 
865    Notes:
866      If nnz is given then nz is ignored
867 
868    Specify the preallocated storage with either nz or nnz (not both).
869    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
870    allocation.  For large problems you MUST preallocate memory or you
871    will get TERRIBLE performance, see the users' manual chapter on matrices.
872 
873    Level: intermediate
874 
875 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
876 
877 @*/
878 PetscErrorCode  MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
879 {
880   PetscFunctionBegin;
881   PetscTryMethod(B,"MatBlockMatSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
882   PetscFunctionReturn(0);
883 }
884 
885 static PetscErrorCode  MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt *nnz)
886 {
887   Mat_BlockMat   *bmat = (Mat_BlockMat*)A->data;
888   PetscInt       i;
889 
890   PetscFunctionBegin;
891   PetscCall(PetscLayoutSetBlockSize(A->rmap,bs));
892   PetscCall(PetscLayoutSetBlockSize(A->cmap,bs));
893   PetscCall(PetscLayoutSetUp(A->rmap));
894   PetscCall(PetscLayoutSetUp(A->cmap));
895   PetscCall(PetscLayoutGetBlockSize(A->rmap,&bs));
896 
897   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
898   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
899   if (nnz) {
900     for (i=0; i<A->rmap->n/bs; i++) {
901       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
902       PetscCheck(nnz[i] <= A->cmap->n/bs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],A->cmap->n/bs);
903     }
904   }
905   bmat->mbs = A->rmap->n/bs;
906 
907   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->right));
908   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs,NULL,&bmat->middle));
909   PetscCall(VecCreateSeq(PETSC_COMM_SELF,bs,&bmat->left));
910 
911   if (!bmat->imax) {
912     PetscCall(PetscMalloc2(A->rmap->n,&bmat->imax,A->rmap->n,&bmat->ilen));
913     PetscCall(PetscLogObjectMemory((PetscObject)A,2*A->rmap->n*sizeof(PetscInt)));
914   }
915   if (PetscLikely(nnz)) {
916     nz = 0;
917     for (i=0; i<A->rmap->n/A->rmap->bs; i++) {
918       bmat->imax[i] = nnz[i];
919       nz           += nnz[i];
920     }
921   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently requires block row by row preallocation");
922 
923   /* bmat->ilen will count nonzeros in each row so far. */
924   PetscCall(PetscArrayzero(bmat->ilen,bmat->mbs));
925 
926   /* allocate the matrix space */
927   PetscCall(MatSeqXAIJFreeAIJ(A,(PetscScalar**)&bmat->a,&bmat->j,&bmat->i));
928   PetscCall(PetscMalloc3(nz,&bmat->a,nz,&bmat->j,A->rmap->n+1,&bmat->i));
929   PetscCall(PetscLogObjectMemory((PetscObject)A,(A->rmap->n+1)*sizeof(PetscInt)+nz*(sizeof(PetscScalar)+sizeof(PetscInt))));
930   bmat->i[0] = 0;
931   for (i=1; i<bmat->mbs+1; i++) {
932     bmat->i[i] = bmat->i[i-1] + bmat->imax[i-1];
933   }
934   bmat->singlemalloc = PETSC_TRUE;
935   bmat->free_a       = PETSC_TRUE;
936   bmat->free_ij      = PETSC_TRUE;
937 
938   bmat->nz            = 0;
939   bmat->maxnz         = nz;
940   A->info.nz_unneeded = (double)bmat->maxnz;
941   PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
942   PetscFunctionReturn(0);
943 }
944 
945 /*MC
946    MATBLOCKMAT - A matrix that is defined by a set of Mat's that represents a sparse block matrix
947                  consisting of (usually) sparse blocks.
948 
949   Level: advanced
950 
951 .seealso: `MatCreateBlockMat()`
952 
953 M*/
954 
955 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
956 {
957   Mat_BlockMat   *b;
958 
959   PetscFunctionBegin;
960   PetscCall(PetscNewLog(A,&b));
961   A->data = (void*)b;
962   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
963 
964   A->assembled    = PETSC_TRUE;
965   A->preallocated = PETSC_FALSE;
966   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATBLOCKMAT));
967 
968   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatBlockMatSetPreallocation_C",MatBlockMatSetPreallocation_BlockMat));
969   PetscFunctionReturn(0);
970 }
971 
972 /*@C
973    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential Mat object
974 
975   Collective
976 
977    Input Parameters:
978 +  comm - MPI communicator
979 .  m - number of rows
980 .  n  - number of columns
981 .  bs - size of each submatrix
982 .  nz  - expected maximum number of nonzero blocks in row (use PETSC_DEFAULT if not known)
983 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
984 
985    Output Parameter:
986 .  A - the matrix
987 
988    Level: intermediate
989 
990    Notes:
991     Matrices of this type are nominally-sparse matrices in which each "entry" is a Mat object.  Each Mat must
992    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
993 
994    For matrices containing parallel submatrices and variable block sizes, see MATNEST.
995 
996 .seealso: `MATBLOCKMAT`, `MatCreateNest()`
997 @*/
998 PetscErrorCode  MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt *nnz, Mat *A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(MatCreate(comm,A));
1002   PetscCall(MatSetSizes(*A,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
1003   PetscCall(MatSetType(*A,MATBLOCKMAT));
1004   PetscCall(MatBlockMatSetPreallocation(*A,bs,nz,nnz));
1005   PetscFunctionReturn(0);
1006 }
1007