xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
18 {
19   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
20   PetscScalar       *x;
21   const Mat         *v;
22   const PetscScalar *b;
23   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
24   const PetscInt    *idx;
25   IS                 row, col;
26   MatFactorInfo      info;
27   Vec                left = a->left, right = a->right, middle = a->middle;
28   Mat               *diag;
29 
30   PetscFunctionBegin;
31   its = its * lits;
32   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
33   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
34   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
35   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
36   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
37 
38   if (!a->diags) {
39     PetscCall(PetscMalloc1(mbs, &a->diags));
40     PetscCall(MatFactorInfoInitialize(&info));
41     for (i = 0; i < mbs; i++) {
42       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
43       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
44       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
45       PetscCall(ISDestroy(&row));
46       PetscCall(ISDestroy(&col));
47     }
48     PetscCall(VecDuplicate(bb, &a->workb));
49   }
50   diag = a->diags;
51 
52   PetscCall(VecSet(xx, 0.0));
53   PetscCall(VecGetArray(xx, &x));
54   /* copy right hand side because it must be modified during iteration */
55   PetscCall(VecCopy(bb, a->workb));
56   PetscCall(VecGetArrayRead(a->workb, &b));
57 
58   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
59   while (its--) {
60     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
61       for (i = 0; i < mbs; i++) {
62         n   = a->i[i + 1] - a->i[i] - 1;
63         idx = a->j + a->i[i] + 1;
64         v   = a->a + a->i[i] + 1;
65 
66         PetscCall(VecSet(left, 0.0));
67         for (j = 0; j < n; j++) {
68           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
69           PetscCall(MatMultAdd(v[j], right, left, left));
70           PetscCall(VecResetArray(right));
71         }
72         PetscCall(VecPlaceArray(right, b + i * bs));
73         PetscCall(VecAYPX(left, -1.0, right));
74         PetscCall(VecResetArray(right));
75 
76         PetscCall(VecPlaceArray(right, x + i * bs));
77         PetscCall(MatSolve(diag[i], left, right));
78 
79         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
80         for (j = 0; j < n; j++) {
81           PetscCall(MatMultTranspose(v[j], right, left));
82           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
83           PetscCall(VecAXPY(middle, -1.0, left));
84           PetscCall(VecResetArray(middle));
85         }
86         PetscCall(VecResetArray(right));
87       }
88     }
89     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
90       for (i = mbs - 1; i >= 0; i--) {
91         n   = a->i[i + 1] - a->i[i] - 1;
92         idx = a->j + a->i[i] + 1;
93         v   = a->a + a->i[i] + 1;
94 
95         PetscCall(VecSet(left, 0.0));
96         for (j = 0; j < n; j++) {
97           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
98           PetscCall(MatMultAdd(v[j], right, left, left));
99           PetscCall(VecResetArray(right));
100         }
101         PetscCall(VecPlaceArray(right, b + i * bs));
102         PetscCall(VecAYPX(left, -1.0, right));
103         PetscCall(VecResetArray(right));
104 
105         PetscCall(VecPlaceArray(right, x + i * bs));
106         PetscCall(MatSolve(diag[i], left, right));
107         PetscCall(VecResetArray(right));
108       }
109     }
110   }
111   PetscCall(VecRestoreArray(xx, &x));
112   PetscCall(VecRestoreArrayRead(a->workb, &b));
113   PetscFunctionReturn(0);
114 }
115 
116 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
117 {
118   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
119   PetscScalar       *x;
120   const Mat         *v;
121   const PetscScalar *b;
122   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
123   const PetscInt    *idx;
124   IS                 row, col;
125   MatFactorInfo      info;
126   Vec                left = a->left, right = a->right;
127   Mat               *diag;
128 
129   PetscFunctionBegin;
130   its = its * lits;
131   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
132   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
133   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
134   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
135 
136   if (!a->diags) {
137     PetscCall(PetscMalloc1(mbs, &a->diags));
138     PetscCall(MatFactorInfoInitialize(&info));
139     for (i = 0; i < mbs; i++) {
140       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
141       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
142       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
143       PetscCall(ISDestroy(&row));
144       PetscCall(ISDestroy(&col));
145     }
146   }
147   diag = a->diags;
148 
149   PetscCall(VecSet(xx, 0.0));
150   PetscCall(VecGetArray(xx, &x));
151   PetscCall(VecGetArrayRead(bb, &b));
152 
153   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
154   while (its--) {
155     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
156       for (i = 0; i < mbs; i++) {
157         n   = a->i[i + 1] - a->i[i];
158         idx = a->j + a->i[i];
159         v   = a->a + a->i[i];
160 
161         PetscCall(VecSet(left, 0.0));
162         for (j = 0; j < n; j++) {
163           if (idx[j] != i) {
164             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
165             PetscCall(MatMultAdd(v[j], right, left, left));
166             PetscCall(VecResetArray(right));
167           }
168         }
169         PetscCall(VecPlaceArray(right, b + i * bs));
170         PetscCall(VecAYPX(left, -1.0, right));
171         PetscCall(VecResetArray(right));
172 
173         PetscCall(VecPlaceArray(right, x + i * bs));
174         PetscCall(MatSolve(diag[i], left, right));
175         PetscCall(VecResetArray(right));
176       }
177     }
178     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
179       for (i = mbs - 1; i >= 0; i--) {
180         n   = a->i[i + 1] - a->i[i];
181         idx = a->j + a->i[i];
182         v   = a->a + a->i[i];
183 
184         PetscCall(VecSet(left, 0.0));
185         for (j = 0; j < n; j++) {
186           if (idx[j] != i) {
187             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
188             PetscCall(MatMultAdd(v[j], right, left, left));
189             PetscCall(VecResetArray(right));
190           }
191         }
192         PetscCall(VecPlaceArray(right, b + i * bs));
193         PetscCall(VecAYPX(left, -1.0, right));
194         PetscCall(VecResetArray(right));
195 
196         PetscCall(VecPlaceArray(right, x + i * bs));
197         PetscCall(MatSolve(diag[i], left, right));
198         PetscCall(VecResetArray(right));
199       }
200     }
201   }
202   PetscCall(VecRestoreArray(xx, &x));
203   PetscCall(VecRestoreArrayRead(bb, &b));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
208 {
209   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
210   PetscInt     *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
211   PetscInt     *imax = a->imax, *ai = a->i, *ailen = a->ilen;
212   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
213   PetscInt      ridx, cidx;
214   PetscBool     roworiented = a->roworiented;
215   MatScalar     value;
216   Mat          *ap, *aa = a->a;
217 
218   PetscFunctionBegin;
219   for (k = 0; k < m; k++) { /* loop over added rows */
220     row  = im[k];
221     brow = row / bs;
222     if (row < 0) continue;
223     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
224     rp   = aj + ai[brow];
225     ap   = aa + ai[brow];
226     rmax = imax[brow];
227     nrow = ailen[brow];
228     low  = 0;
229     high = nrow;
230     for (l = 0; l < n; l++) { /* loop over added columns */
231       if (in[l] < 0) continue;
232       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
233       col  = in[l];
234       bcol = col / bs;
235       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
236       ridx = row % bs;
237       cidx = col % bs;
238       if (roworiented) value = v[l + k * n];
239       else value = v[k + l * m];
240 
241       if (col <= lastcol) low = 0;
242       else high = nrow;
243       lastcol = col;
244       while (high - low > 7) {
245         t = (low + high) / 2;
246         if (rp[t] > bcol) high = t;
247         else low = t;
248       }
249       for (i = low; i < high; i++) {
250         if (rp[i] > bcol) break;
251         if (rp[i] == bcol) goto noinsert1;
252       }
253       if (nonew == 1) goto noinsert1;
254       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
255       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
256       N = nrow++ - 1;
257       high++;
258       /* shift up all the later entries in this row */
259       for (ii = N; ii >= i; ii--) {
260         rp[ii + 1] = rp[ii];
261         ap[ii + 1] = ap[ii];
262       }
263       if (N >= i) ap[i] = NULL;
264       rp[i] = bcol;
265       a->nz++;
266       A->nonzerostate++;
267     noinsert1:;
268       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
269       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
270       low = i;
271     }
272     ailen[brow] = nrow;
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
278 {
279   Mat                tmpA;
280   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
281   const PetscInt    *cols;
282   const PetscScalar *values;
283   PetscBool          flg = PETSC_FALSE, notdone;
284   Mat_SeqAIJ        *a;
285   Mat_BlockMat      *amat;
286 
287   PetscFunctionBegin;
288   /* force binary viewer to load .info file if it has not yet done so */
289   PetscCall(PetscViewerSetUp(viewer));
290   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
291   PetscCall(MatSetType(tmpA, MATSEQAIJ));
292   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
293 
294   PetscCall(MatGetLocalSize(tmpA, &m, &n));
295   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
296   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
297   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
298   PetscOptionsEnd();
299 
300   /* Determine number of nonzero blocks for each block row */
301   a   = (Mat_SeqAIJ *)tmpA->data;
302   mbs = m / bs;
303   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
304   PetscCall(PetscArrayzero(lens, mbs));
305 
306   for (i = 0; i < mbs; i++) {
307     for (j = 0; j < bs; j++) {
308       ii[j]    = a->j + a->i[i * bs + j];
309       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
310     }
311 
312     currentcol = -1;
313     while (PETSC_TRUE) {
314       notdone = PETSC_FALSE;
315       nextcol = 1000000000;
316       for (j = 0; j < bs; j++) {
317         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) {
318           ii[j]++;
319           ilens[j]--;
320         }
321         if (ilens[j] > 0) {
322           notdone = PETSC_TRUE;
323           nextcol = PetscMin(nextcol, ii[j][0] / bs);
324         }
325       }
326       if (!notdone) break;
327       if (!flg || (nextcol >= i)) lens[i]++;
328       currentcol = nextcol;
329     }
330   }
331 
332   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
333   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
334   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
335   amat = (Mat_BlockMat *)(newmat)->data;
336 
337   /* preallocate the submatrices */
338   PetscCall(PetscMalloc1(bs, &llens));
339   for (i = 0; i < mbs; i++) { /* loops for block rows */
340     for (j = 0; j < bs; j++) {
341       ii[j]    = a->j + a->i[i * bs + j];
342       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
343     }
344 
345     currentcol = 1000000000;
346     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
347       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
348     }
349 
350     while (PETSC_TRUE) { /* loops over blocks in block row */
351       notdone = PETSC_FALSE;
352       nextcol = 1000000000;
353       PetscCall(PetscArrayzero(llens, bs));
354       for (j = 0; j < bs; j++) {                                /* loop over rows in block */
355         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { /* loop over columns in row */
356           ii[j]++;
357           ilens[j]--;
358           llens[j]++;
359         }
360         if (ilens[j] > 0) {
361           notdone = PETSC_TRUE;
362           nextcol = PetscMin(nextcol, ii[j][0] / bs);
363         }
364       }
365       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
366       if (!flg || currentcol >= i) {
367         amat->j[cnt] = currentcol;
368         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
369       }
370 
371       if (!notdone) break;
372       currentcol = nextcol;
373     }
374     amat->ilen[i] = lens[i];
375   }
376 
377   PetscCall(PetscFree3(lens, ii, ilens));
378   PetscCall(PetscFree(llens));
379 
380   /* copy over the matrix, one row at a time */
381   for (i = 0; i < m; i++) {
382     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
383     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
384     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
385   }
386   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
387   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
388   PetscFunctionReturn(0);
389 }
390 
391 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
392 {
393   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
394   const char       *name;
395   PetscViewerFormat format;
396 
397   PetscFunctionBegin;
398   PetscCall(PetscObjectGetName((PetscObject)A, &name));
399   PetscCall(PetscViewerGetFormat(viewer, &format));
400   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
401     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
402     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
408 {
409   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
410   PetscInt      i;
411 
412   PetscFunctionBegin;
413   PetscCall(VecDestroy(&bmat->right));
414   PetscCall(VecDestroy(&bmat->left));
415   PetscCall(VecDestroy(&bmat->middle));
416   PetscCall(VecDestroy(&bmat->workb));
417   if (bmat->diags) {
418     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
419   }
420   if (bmat->a) {
421     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
422   }
423   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
424   PetscCall(PetscFree(mat->data));
425   PetscFunctionReturn(0);
426 }
427 
428 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
429 {
430   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
431   PetscScalar  *xx, *yy;
432   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
433   Mat          *aa;
434 
435   PetscFunctionBegin;
436   /*
437      Standard CSR multiply except each entry is a Mat
438   */
439   PetscCall(VecGetArray(x, &xx));
440 
441   PetscCall(VecSet(y, 0.0));
442   PetscCall(VecGetArray(y, &yy));
443   aj = bmat->j;
444   aa = bmat->a;
445   ii = bmat->i;
446   for (i = 0; i < m; i++) {
447     jrow = ii[i];
448     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
449     n = ii[i + 1] - jrow;
450     for (j = 0; j < n; j++) {
451       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
452       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
453       PetscCall(VecResetArray(bmat->right));
454       jrow++;
455     }
456     PetscCall(VecResetArray(bmat->left));
457   }
458   PetscCall(VecRestoreArray(x, &xx));
459   PetscCall(VecRestoreArray(y, &yy));
460   PetscFunctionReturn(0);
461 }
462 
463 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
464 {
465   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
466   PetscScalar  *xx, *yy;
467   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
468   Mat          *aa;
469 
470   PetscFunctionBegin;
471   /*
472      Standard CSR multiply except each entry is a Mat
473   */
474   PetscCall(VecGetArray(x, &xx));
475 
476   PetscCall(VecSet(y, 0.0));
477   PetscCall(VecGetArray(y, &yy));
478   aj = bmat->j;
479   aa = bmat->a;
480   ii = bmat->i;
481   for (i = 0; i < m; i++) {
482     jrow = ii[i];
483     n    = ii[i + 1] - jrow;
484     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
485     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
486     /* if we ALWAYS required a diagonal entry then could remove this if test */
487     if (aj[jrow] == i) {
488       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
489       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
490       PetscCall(VecResetArray(bmat->right));
491       jrow++;
492       n--;
493     }
494     for (j = 0; j < n; j++) {
495       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
496       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
497       PetscCall(VecResetArray(bmat->right));
498 
499       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
500       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
501       PetscCall(VecResetArray(bmat->right));
502       jrow++;
503     }
504     PetscCall(VecResetArray(bmat->left));
505     PetscCall(VecResetArray(bmat->middle));
506   }
507   PetscCall(VecRestoreArray(x, &xx));
508   PetscCall(VecRestoreArray(y, &yy));
509   PetscFunctionReturn(0);
510 }
511 
512 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
513 {
514   PetscFunctionBegin;
515   PetscFunctionReturn(0);
516 }
517 
518 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
519 {
520   PetscFunctionBegin;
521   PetscFunctionReturn(0);
522 }
523 
524 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
525 {
526   PetscFunctionBegin;
527   PetscFunctionReturn(0);
528 }
529 
530 /*
531      Adds diagonal pointers to sparse matrix structure.
532 */
533 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
534 {
535   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
536   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;
537 
538   PetscFunctionBegin;
539   if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
540   for (i = 0; i < mbs; i++) {
541     a->diag[i] = a->i[i + 1];
542     for (j = a->i[i]; j < a->i[i + 1]; j++) {
543       if (a->j[j] == i) {
544         a->diag[i] = j;
545         break;
546       }
547     }
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
553 {
554   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
555   Mat_SeqAIJ   *c;
556   PetscInt      i, k, first, step, lensi, nrows, ncols;
557   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
558   PetscScalar  *a_new;
559   Mat           C, *aa = a->a;
560   PetscBool     stride, equal;
561 
562   PetscFunctionBegin;
563   PetscCall(ISEqual(isrow, iscol, &equal));
564   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
565   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
566   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
567   PetscCall(ISStrideGetInfo(iscol, &first, &step));
568   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
569 
570   PetscCall(ISGetLocalSize(isrow, &nrows));
571   ncols = nrows;
572 
573   /* create submatrix */
574   if (scall == MAT_REUSE_MATRIX) {
575     PetscInt n_cols, n_rows;
576     C = *B;
577     PetscCall(MatGetSize(C, &n_rows, &n_cols));
578     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
579     PetscCall(MatZeroEntries(C));
580   } else {
581     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
582     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
583     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
584     else PetscCall(MatSetType(C, MATSEQAIJ));
585     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
586     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
587   }
588   c = (Mat_SeqAIJ *)C->data;
589 
590   /* loop over rows inserting into submatrix */
591   a_new = c->a;
592   j_new = c->j;
593   i_new = c->i;
594 
595   for (i = 0; i < nrows; i++) {
596     lensi = ailen[i];
597     for (k = 0; k < lensi; k++) {
598       *j_new++ = *aj++;
599       PetscCall(MatGetValue(*aa++, first, first, a_new++));
600     }
601     i_new[i + 1] = i_new[i] + lensi;
602     c->ilen[i]   = lensi;
603   }
604 
605   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
606   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
607   *B = C;
608   PetscFunctionReturn(0);
609 }
610 
611 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
612 {
613   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
614   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
615   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
616   Mat          *aa = a->a, *ap;
617 
618   PetscFunctionBegin;
619   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
620 
621   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
622   for (i = 1; i < m; i++) {
623     /* move each row back by the amount of empty slots (fshift) before it*/
624     fshift += imax[i - 1] - ailen[i - 1];
625     rmax = PetscMax(rmax, ailen[i]);
626     if (fshift) {
627       ip = aj + ai[i];
628       ap = aa + ai[i];
629       N  = ailen[i];
630       for (j = 0; j < N; j++) {
631         ip[j - fshift] = ip[j];
632         ap[j - fshift] = ap[j];
633       }
634     }
635     ai[i] = ai[i - 1] + ailen[i - 1];
636   }
637   if (m) {
638     fshift += imax[m - 1] - ailen[m - 1];
639     ai[m] = ai[m - 1] + ailen[m - 1];
640   }
641   /* reset ilen and imax for each row */
642   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
643   a->nz = ai[m];
644   for (i = 0; i < a->nz; i++) {
645     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
646     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
647     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
648   }
649   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
650   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
651   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
652 
653   A->info.mallocs += a->reallocs;
654   a->reallocs         = 0;
655   A->info.nz_unneeded = (double)fshift;
656   a->rmax             = rmax;
657   PetscCall(MatMarkDiagonal_BlockMat(A));
658   PetscFunctionReturn(0);
659 }
660 
661 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
662 {
663   PetscFunctionBegin;
664   if (opt == MAT_SYMMETRIC && flg) {
665     A->ops->sor  = MatSOR_BlockMat_Symmetric;
666     A->ops->mult = MatMult_BlockMat_Symmetric;
667   } else {
668     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
674                                        NULL,
675                                        NULL,
676                                        MatMult_BlockMat,
677                                        /*  4*/ MatMultAdd_BlockMat,
678                                        MatMultTranspose_BlockMat,
679                                        MatMultTransposeAdd_BlockMat,
680                                        NULL,
681                                        NULL,
682                                        NULL,
683                                        /* 10*/ NULL,
684                                        NULL,
685                                        NULL,
686                                        MatSOR_BlockMat,
687                                        NULL,
688                                        /* 15*/ NULL,
689                                        NULL,
690                                        NULL,
691                                        NULL,
692                                        NULL,
693                                        /* 20*/ NULL,
694                                        MatAssemblyEnd_BlockMat,
695                                        MatSetOption_BlockMat,
696                                        NULL,
697                                        /* 24*/ NULL,
698                                        NULL,
699                                        NULL,
700                                        NULL,
701                                        NULL,
702                                        /* 29*/ NULL,
703                                        NULL,
704                                        NULL,
705                                        NULL,
706                                        NULL,
707                                        /* 34*/ NULL,
708                                        NULL,
709                                        NULL,
710                                        NULL,
711                                        NULL,
712                                        /* 39*/ NULL,
713                                        NULL,
714                                        NULL,
715                                        NULL,
716                                        NULL,
717                                        /* 44*/ NULL,
718                                        NULL,
719                                        MatShift_Basic,
720                                        NULL,
721                                        NULL,
722                                        /* 49*/ NULL,
723                                        NULL,
724                                        NULL,
725                                        NULL,
726                                        NULL,
727                                        /* 54*/ NULL,
728                                        NULL,
729                                        NULL,
730                                        NULL,
731                                        NULL,
732                                        /* 59*/ MatCreateSubMatrix_BlockMat,
733                                        MatDestroy_BlockMat,
734                                        MatView_BlockMat,
735                                        NULL,
736                                        NULL,
737                                        /* 64*/ NULL,
738                                        NULL,
739                                        NULL,
740                                        NULL,
741                                        NULL,
742                                        /* 69*/ NULL,
743                                        NULL,
744                                        NULL,
745                                        NULL,
746                                        NULL,
747                                        /* 74*/ NULL,
748                                        NULL,
749                                        NULL,
750                                        NULL,
751                                        NULL,
752                                        /* 79*/ NULL,
753                                        NULL,
754                                        NULL,
755                                        NULL,
756                                        MatLoad_BlockMat,
757                                        /* 84*/ NULL,
758                                        NULL,
759                                        NULL,
760                                        NULL,
761                                        NULL,
762                                        /* 89*/ NULL,
763                                        NULL,
764                                        NULL,
765                                        NULL,
766                                        NULL,
767                                        /* 94*/ NULL,
768                                        NULL,
769                                        NULL,
770                                        NULL,
771                                        NULL,
772                                        /* 99*/ NULL,
773                                        NULL,
774                                        NULL,
775                                        NULL,
776                                        NULL,
777                                        /*104*/ NULL,
778                                        NULL,
779                                        NULL,
780                                        NULL,
781                                        NULL,
782                                        /*109*/ NULL,
783                                        NULL,
784                                        NULL,
785                                        NULL,
786                                        NULL,
787                                        /*114*/ NULL,
788                                        NULL,
789                                        NULL,
790                                        NULL,
791                                        NULL,
792                                        /*119*/ NULL,
793                                        NULL,
794                                        NULL,
795                                        NULL,
796                                        NULL,
797                                        /*124*/ NULL,
798                                        NULL,
799                                        NULL,
800                                        NULL,
801                                        NULL,
802                                        /*129*/ NULL,
803                                        NULL,
804                                        NULL,
805                                        NULL,
806                                        NULL,
807                                        /*134*/ NULL,
808                                        NULL,
809                                        NULL,
810                                        NULL,
811                                        NULL,
812                                        /*139*/ NULL,
813                                        NULL,
814                                        NULL,
815                                        NULL,
816                                        NULL,
817                                        /*144*/ NULL,
818                                        NULL,
819                                        NULL,
820                                        NULL,
821                                        NULL,
822                                        NULL,
823                                        /*150*/ NULL};
824 
825 /*@C
826    MatBlockMatSetPreallocation - For good matrix assembly performance
827    the user should preallocate the matrix storage by setting the parameter nz
828    (or the array nnz).  By setting these parameters accurately, performance
829    during matrix assembly can be increased by more than a factor of 50.
830 
831    Collective
832 
833    Input Parameters:
834 +  B - The matrix
835 .  bs - size of each block in matrix
836 .  nz - number of nonzeros per block row (same for all rows)
837 -  nnz - array containing the number of nonzeros in the various block rows
838          (possibly different for each row) or NULL
839 
840    Notes:
841      If nnz is given then nz is ignored
842 
843    Specify the preallocated storage with either nz or nnz (not both).
844    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
845    allocation.  For large problems you MUST preallocate memory or you
846    will get TERRIBLE performance, see the users' manual chapter on matrices.
847 
848    Level: intermediate
849 
850 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
851 @*/
852 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
853 {
854   PetscFunctionBegin;
855   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
856   PetscFunctionReturn(0);
857 }
858 
859 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
860 {
861   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
862   PetscInt      i;
863 
864   PetscFunctionBegin;
865   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
866   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
867   PetscCall(PetscLayoutSetUp(A->rmap));
868   PetscCall(PetscLayoutSetUp(A->cmap));
869   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
870 
871   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
872   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
873   if (nnz) {
874     for (i = 0; i < A->rmap->n / bs; i++) {
875       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
876       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
877     }
878   }
879   bmat->mbs = A->rmap->n / bs;
880 
881   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
882   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
883   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
884 
885   if (!bmat->imax) { PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen)); }
886   if (PetscLikely(nnz)) {
887     nz = 0;
888     for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
889       bmat->imax[i] = nnz[i];
890       nz += nnz[i];
891     }
892   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
893 
894   /* bmat->ilen will count nonzeros in each row so far. */
895   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
896 
897   /* allocate the matrix space */
898   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
899   PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
900   bmat->i[0] = 0;
901   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
902   bmat->singlemalloc = PETSC_TRUE;
903   bmat->free_a       = PETSC_TRUE;
904   bmat->free_ij      = PETSC_TRUE;
905 
906   bmat->nz            = 0;
907   bmat->maxnz         = nz;
908   A->info.nz_unneeded = (double)bmat->maxnz;
909   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
910   PetscFunctionReturn(0);
911 }
912 
913 /*MC
914    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
915                  consisting of (usually) sparse blocks.
916 
917   Level: advanced
918 
919 .seealso: `MatCreateBlockMat()`
920 M*/
921 
922 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
923 {
924   Mat_BlockMat *b;
925 
926   PetscFunctionBegin;
927   PetscCall(PetscNew(&b));
928   A->data = (void *)b;
929   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
930 
931   A->assembled    = PETSC_TRUE;
932   A->preallocated = PETSC_FALSE;
933   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
934 
935   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
936   PetscFunctionReturn(0);
937 }
938 
939 /*@C
940    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
941 
942   Collective
943 
944    Input Parameters:
945 +  comm - MPI communicator
946 .  m - number of rows
947 .  n  - number of columns
948 .  bs - size of each submatrix
949 .  nz  - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
950 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
951 
952    Output Parameter:
953 .  A - the matrix
954 
955    Level: intermediate
956 
957    Notes:
958     Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
959    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
960 
961    For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
962 
963    Developer Note:
964    I don't like the name, it is not `MATNESTMAT`
965 
966 .seealso: `MATBLOCKMAT`, `MatCreateNest()`
967 @*/
968 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
969 {
970   PetscFunctionBegin;
971   PetscCall(MatCreate(comm, A));
972   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
973   PetscCall(MatSetType(*A, MATBLOCKMAT));
974   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
975   PetscFunctionReturn(0);
976 }
977