xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3    This provides a matrix that consists of Mats
4 */
5 
6 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
7 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */
8 
9 typedef struct {
10   SEQAIJHEADER(Mat);
11   SEQBAIJHEADER;
12   Mat *diags;
13 
14   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
15 } Mat_BlockMat;
16 
17 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
18   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
19   PetscScalar       *x;
20   const Mat         *v;
21   const PetscScalar *b;
22   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
23   const PetscInt    *idx;
24   IS                 row, col;
25   MatFactorInfo      info;
26   Vec                left = a->left, right = a->right, middle = a->middle;
27   Mat               *diag;
28 
29   PetscFunctionBegin;
30   its = its * lits;
31   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
32   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
33   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
34   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
35   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
36 
37   if (!a->diags) {
38     PetscCall(PetscMalloc1(mbs, &a->diags));
39     PetscCall(MatFactorInfoInitialize(&info));
40     for (i = 0; i < mbs; i++) {
41       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
42       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
43       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
44       PetscCall(ISDestroy(&row));
45       PetscCall(ISDestroy(&col));
46     }
47     PetscCall(VecDuplicate(bb, &a->workb));
48   }
49   diag = a->diags;
50 
51   PetscCall(VecSet(xx, 0.0));
52   PetscCall(VecGetArray(xx, &x));
53   /* copy right hand side because it must be modified during iteration */
54   PetscCall(VecCopy(bb, a->workb));
55   PetscCall(VecGetArrayRead(a->workb, &b));
56 
57   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
58   while (its--) {
59     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
60       for (i = 0; i < mbs; i++) {
61         n   = a->i[i + 1] - a->i[i] - 1;
62         idx = a->j + a->i[i] + 1;
63         v   = a->a + a->i[i] + 1;
64 
65         PetscCall(VecSet(left, 0.0));
66         for (j = 0; j < n; j++) {
67           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
68           PetscCall(MatMultAdd(v[j], right, left, left));
69           PetscCall(VecResetArray(right));
70         }
71         PetscCall(VecPlaceArray(right, b + i * bs));
72         PetscCall(VecAYPX(left, -1.0, right));
73         PetscCall(VecResetArray(right));
74 
75         PetscCall(VecPlaceArray(right, x + i * bs));
76         PetscCall(MatSolve(diag[i], left, right));
77 
78         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
79         for (j = 0; j < n; j++) {
80           PetscCall(MatMultTranspose(v[j], right, left));
81           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
82           PetscCall(VecAXPY(middle, -1.0, left));
83           PetscCall(VecResetArray(middle));
84         }
85         PetscCall(VecResetArray(right));
86       }
87     }
88     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
89       for (i = mbs - 1; i >= 0; i--) {
90         n   = a->i[i + 1] - a->i[i] - 1;
91         idx = a->j + a->i[i] + 1;
92         v   = a->a + a->i[i] + 1;
93 
94         PetscCall(VecSet(left, 0.0));
95         for (j = 0; j < n; j++) {
96           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
97           PetscCall(MatMultAdd(v[j], right, left, left));
98           PetscCall(VecResetArray(right));
99         }
100         PetscCall(VecPlaceArray(right, b + i * bs));
101         PetscCall(VecAYPX(left, -1.0, right));
102         PetscCall(VecResetArray(right));
103 
104         PetscCall(VecPlaceArray(right, x + i * bs));
105         PetscCall(MatSolve(diag[i], left, right));
106         PetscCall(VecResetArray(right));
107       }
108     }
109   }
110   PetscCall(VecRestoreArray(xx, &x));
111   PetscCall(VecRestoreArrayRead(a->workb, &b));
112   PetscFunctionReturn(0);
113 }
114 
115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
116   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
117   PetscScalar       *x;
118   const Mat         *v;
119   const PetscScalar *b;
120   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
121   const PetscInt    *idx;
122   IS                 row, col;
123   MatFactorInfo      info;
124   Vec                left = a->left, right = a->right;
125   Mat               *diag;
126 
127   PetscFunctionBegin;
128   its = its * lits;
129   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
130   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
131   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
132   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
133 
134   if (!a->diags) {
135     PetscCall(PetscMalloc1(mbs, &a->diags));
136     PetscCall(MatFactorInfoInitialize(&info));
137     for (i = 0; i < mbs; i++) {
138       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
139       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
140       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
141       PetscCall(ISDestroy(&row));
142       PetscCall(ISDestroy(&col));
143     }
144   }
145   diag = a->diags;
146 
147   PetscCall(VecSet(xx, 0.0));
148   PetscCall(VecGetArray(xx, &x));
149   PetscCall(VecGetArrayRead(bb, &b));
150 
151   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
152   while (its--) {
153     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
154       for (i = 0; i < mbs; i++) {
155         n   = a->i[i + 1] - a->i[i];
156         idx = a->j + a->i[i];
157         v   = a->a + a->i[i];
158 
159         PetscCall(VecSet(left, 0.0));
160         for (j = 0; j < n; j++) {
161           if (idx[j] != i) {
162             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
163             PetscCall(MatMultAdd(v[j], right, left, left));
164             PetscCall(VecResetArray(right));
165           }
166         }
167         PetscCall(VecPlaceArray(right, b + i * bs));
168         PetscCall(VecAYPX(left, -1.0, right));
169         PetscCall(VecResetArray(right));
170 
171         PetscCall(VecPlaceArray(right, x + i * bs));
172         PetscCall(MatSolve(diag[i], left, right));
173         PetscCall(VecResetArray(right));
174       }
175     }
176     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
177       for (i = mbs - 1; i >= 0; i--) {
178         n   = a->i[i + 1] - a->i[i];
179         idx = a->j + a->i[i];
180         v   = a->a + a->i[i];
181 
182         PetscCall(VecSet(left, 0.0));
183         for (j = 0; j < n; j++) {
184           if (idx[j] != i) {
185             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
186             PetscCall(MatMultAdd(v[j], right, left, left));
187             PetscCall(VecResetArray(right));
188           }
189         }
190         PetscCall(VecPlaceArray(right, b + i * bs));
191         PetscCall(VecAYPX(left, -1.0, right));
192         PetscCall(VecResetArray(right));
193 
194         PetscCall(VecPlaceArray(right, x + i * bs));
195         PetscCall(MatSolve(diag[i], left, right));
196         PetscCall(VecResetArray(right));
197       }
198     }
199   }
200   PetscCall(VecRestoreArray(xx, &x));
201   PetscCall(VecRestoreArrayRead(bb, &b));
202   PetscFunctionReturn(0);
203 }
204 
205 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) {
206   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
207   PetscInt     *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
208   PetscInt     *imax = a->imax, *ai = a->i, *ailen = a->ilen;
209   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
210   PetscInt      ridx, cidx;
211   PetscBool     roworiented = a->roworiented;
212   MatScalar     value;
213   Mat          *ap, *aa = a->a;
214 
215   PetscFunctionBegin;
216   for (k = 0; k < m; k++) { /* loop over added rows */
217     row  = im[k];
218     brow = row / bs;
219     if (row < 0) continue;
220     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
221     rp   = aj + ai[brow];
222     ap   = aa + ai[brow];
223     rmax = imax[brow];
224     nrow = ailen[brow];
225     low  = 0;
226     high = nrow;
227     for (l = 0; l < n; l++) { /* loop over added columns */
228       if (in[l] < 0) continue;
229       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
230       col  = in[l];
231       bcol = col / bs;
232       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
233       ridx = row % bs;
234       cidx = col % bs;
235       if (roworiented) value = v[l + k * n];
236       else value = v[k + l * m];
237 
238       if (col <= lastcol) low = 0;
239       else high = nrow;
240       lastcol = col;
241       while (high - low > 7) {
242         t = (low + high) / 2;
243         if (rp[t] > bcol) high = t;
244         else low = t;
245       }
246       for (i = low; i < high; i++) {
247         if (rp[i] > bcol) break;
248         if (rp[i] == bcol) goto noinsert1;
249       }
250       if (nonew == 1) goto noinsert1;
251       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
252       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
253       N = nrow++ - 1;
254       high++;
255       /* shift up all the later entries in this row */
256       for (ii = N; ii >= i; ii--) {
257         rp[ii + 1] = rp[ii];
258         ap[ii + 1] = ap[ii];
259       }
260       if (N >= i) ap[i] = NULL;
261       rp[i] = bcol;
262       a->nz++;
263       A->nonzerostate++;
264     noinsert1:;
265       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
266       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
267       low = i;
268     }
269     ailen[brow] = nrow;
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer) {
275   Mat                tmpA;
276   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
277   const PetscInt    *cols;
278   const PetscScalar *values;
279   PetscBool          flg = PETSC_FALSE, notdone;
280   Mat_SeqAIJ        *a;
281   Mat_BlockMat      *amat;
282 
283   PetscFunctionBegin;
284   /* force binary viewer to load .info file if it has not yet done so */
285   PetscCall(PetscViewerSetUp(viewer));
286   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
287   PetscCall(MatSetType(tmpA, MATSEQAIJ));
288   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
289 
290   PetscCall(MatGetLocalSize(tmpA, &m, &n));
291   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
292   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
293   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
294   PetscOptionsEnd();
295 
296   /* Determine number of nonzero blocks for each block row */
297   a   = (Mat_SeqAIJ *)tmpA->data;
298   mbs = m / bs;
299   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
300   PetscCall(PetscArrayzero(lens, mbs));
301 
302   for (i = 0; i < mbs; i++) {
303     for (j = 0; j < bs; j++) {
304       ii[j]    = a->j + a->i[i * bs + j];
305       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
306     }
307 
308     currentcol = -1;
309     while (PETSC_TRUE) {
310       notdone = PETSC_FALSE;
311       nextcol = 1000000000;
312       for (j = 0; j < bs; j++) {
313         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) {
314           ii[j]++;
315           ilens[j]--;
316         }
317         if (ilens[j] > 0) {
318           notdone = PETSC_TRUE;
319           nextcol = PetscMin(nextcol, ii[j][0] / bs);
320         }
321       }
322       if (!notdone) break;
323       if (!flg || (nextcol >= i)) lens[i]++;
324       currentcol = nextcol;
325     }
326   }
327 
328   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
329   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
330   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
331   amat = (Mat_BlockMat *)(newmat)->data;
332 
333   /* preallocate the submatrices */
334   PetscCall(PetscMalloc1(bs, &llens));
335   for (i = 0; i < mbs; i++) { /* loops for block rows */
336     for (j = 0; j < bs; j++) {
337       ii[j]    = a->j + a->i[i * bs + j];
338       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
339     }
340 
341     currentcol = 1000000000;
342     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
343       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
344     }
345 
346     while (PETSC_TRUE) { /* loops over blocks in block row */
347       notdone = PETSC_FALSE;
348       nextcol = 1000000000;
349       PetscCall(PetscArrayzero(llens, bs));
350       for (j = 0; j < bs; j++) {                                /* loop over rows in block */
351         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { /* loop over columns in row */
352           ii[j]++;
353           ilens[j]--;
354           llens[j]++;
355         }
356         if (ilens[j] > 0) {
357           notdone = PETSC_TRUE;
358           nextcol = PetscMin(nextcol, ii[j][0] / bs);
359         }
360       }
361       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
362       if (!flg || currentcol >= i) {
363         amat->j[cnt] = currentcol;
364         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
365       }
366 
367       if (!notdone) break;
368       currentcol = nextcol;
369     }
370     amat->ilen[i] = lens[i];
371   }
372 
373   PetscCall(PetscFree3(lens, ii, ilens));
374   PetscCall(PetscFree(llens));
375 
376   /* copy over the matrix, one row at a time */
377   for (i = 0; i < m; i++) {
378     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
379     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
380     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
381   }
382   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
383   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
384   PetscFunctionReturn(0);
385 }
386 
387 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer) {
388   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
389   const char       *name;
390   PetscViewerFormat format;
391 
392   PetscFunctionBegin;
393   PetscCall(PetscObjectGetName((PetscObject)A, &name));
394   PetscCall(PetscViewerGetFormat(viewer, &format));
395   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
396     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
397     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
398   }
399   PetscFunctionReturn(0);
400 }
401 
402 static PetscErrorCode MatDestroy_BlockMat(Mat mat) {
403   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
404   PetscInt      i;
405 
406   PetscFunctionBegin;
407   PetscCall(VecDestroy(&bmat->right));
408   PetscCall(VecDestroy(&bmat->left));
409   PetscCall(VecDestroy(&bmat->middle));
410   PetscCall(VecDestroy(&bmat->workb));
411   if (bmat->diags) {
412     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
413   }
414   if (bmat->a) {
415     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
416   }
417   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
418   PetscCall(PetscFree(mat->data));
419   PetscFunctionReturn(0);
420 }
421 
422 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y) {
423   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
424   PetscScalar  *xx, *yy;
425   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
426   Mat          *aa;
427 
428   PetscFunctionBegin;
429   /*
430      Standard CSR multiply except each entry is a Mat
431   */
432   PetscCall(VecGetArray(x, &xx));
433 
434   PetscCall(VecSet(y, 0.0));
435   PetscCall(VecGetArray(y, &yy));
436   aj = bmat->j;
437   aa = bmat->a;
438   ii = bmat->i;
439   for (i = 0; i < m; i++) {
440     jrow = ii[i];
441     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
442     n = ii[i + 1] - jrow;
443     for (j = 0; j < n; j++) {
444       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
445       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
446       PetscCall(VecResetArray(bmat->right));
447       jrow++;
448     }
449     PetscCall(VecResetArray(bmat->left));
450   }
451   PetscCall(VecRestoreArray(x, &xx));
452   PetscCall(VecRestoreArray(y, &yy));
453   PetscFunctionReturn(0);
454 }
455 
456 PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y) {
457   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
458   PetscScalar  *xx, *yy;
459   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
460   Mat          *aa;
461 
462   PetscFunctionBegin;
463   /*
464      Standard CSR multiply except each entry is a Mat
465   */
466   PetscCall(VecGetArray(x, &xx));
467 
468   PetscCall(VecSet(y, 0.0));
469   PetscCall(VecGetArray(y, &yy));
470   aj = bmat->j;
471   aa = bmat->a;
472   ii = bmat->i;
473   for (i = 0; i < m; i++) {
474     jrow = ii[i];
475     n    = ii[i + 1] - jrow;
476     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
477     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
478     /* if we ALWAYS required a diagonal entry then could remove this if test */
479     if (aj[jrow] == i) {
480       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
481       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
482       PetscCall(VecResetArray(bmat->right));
483       jrow++;
484       n--;
485     }
486     for (j = 0; j < n; j++) {
487       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
488       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
489       PetscCall(VecResetArray(bmat->right));
490 
491       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
492       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
493       PetscCall(VecResetArray(bmat->right));
494       jrow++;
495     }
496     PetscCall(VecResetArray(bmat->left));
497     PetscCall(VecResetArray(bmat->middle));
498   }
499   PetscCall(VecRestoreArray(x, &xx));
500   PetscCall(VecRestoreArray(y, &yy));
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) {
505   PetscFunctionBegin;
506   PetscFunctionReturn(0);
507 }
508 
509 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y) {
510   PetscFunctionBegin;
511   PetscFunctionReturn(0);
512 }
513 
514 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z) {
515   PetscFunctionBegin;
516   PetscFunctionReturn(0);
517 }
518 
519 /*
520      Adds diagonal pointers to sparse matrix structure.
521 */
522 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A) {
523   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
524   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;
525 
526   PetscFunctionBegin;
527   if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
528   for (i = 0; i < mbs; i++) {
529     a->diag[i] = a->i[i + 1];
530     for (j = a->i[i]; j < a->i[i + 1]; j++) {
531       if (a->j[j] == i) {
532         a->diag[i] = j;
533         break;
534       }
535     }
536   }
537   PetscFunctionReturn(0);
538 }
539 
540 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) {
541   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
542   Mat_SeqAIJ   *c;
543   PetscInt      i, k, first, step, lensi, nrows, ncols;
544   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
545   PetscScalar  *a_new;
546   Mat           C, *aa = a->a;
547   PetscBool     stride, equal;
548 
549   PetscFunctionBegin;
550   PetscCall(ISEqual(isrow, iscol, &equal));
551   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
552   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
553   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
554   PetscCall(ISStrideGetInfo(iscol, &first, &step));
555   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
556 
557   PetscCall(ISGetLocalSize(isrow, &nrows));
558   ncols = nrows;
559 
560   /* create submatrix */
561   if (scall == MAT_REUSE_MATRIX) {
562     PetscInt n_cols, n_rows;
563     C = *B;
564     PetscCall(MatGetSize(C, &n_rows, &n_cols));
565     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
566     PetscCall(MatZeroEntries(C));
567   } else {
568     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
569     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
570     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
571     else PetscCall(MatSetType(C, MATSEQAIJ));
572     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
573     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
574   }
575   c = (Mat_SeqAIJ *)C->data;
576 
577   /* loop over rows inserting into submatrix */
578   a_new = c->a;
579   j_new = c->j;
580   i_new = c->i;
581 
582   for (i = 0; i < nrows; i++) {
583     lensi = ailen[i];
584     for (k = 0; k < lensi; k++) {
585       *j_new++ = *aj++;
586       PetscCall(MatGetValue(*aa++, first, first, a_new++));
587     }
588     i_new[i + 1] = i_new[i] + lensi;
589     c->ilen[i]   = lensi;
590   }
591 
592   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
593   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
594   *B = C;
595   PetscFunctionReturn(0);
596 }
597 
598 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode) {
599   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
600   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
601   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
602   Mat          *aa = a->a, *ap;
603 
604   PetscFunctionBegin;
605   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
606 
607   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
608   for (i = 1; i < m; i++) {
609     /* move each row back by the amount of empty slots (fshift) before it*/
610     fshift += imax[i - 1] - ailen[i - 1];
611     rmax = PetscMax(rmax, ailen[i]);
612     if (fshift) {
613       ip = aj + ai[i];
614       ap = aa + ai[i];
615       N  = ailen[i];
616       for (j = 0; j < N; j++) {
617         ip[j - fshift] = ip[j];
618         ap[j - fshift] = ap[j];
619       }
620     }
621     ai[i] = ai[i - 1] + ailen[i - 1];
622   }
623   if (m) {
624     fshift += imax[m - 1] - ailen[m - 1];
625     ai[m] = ai[m - 1] + ailen[m - 1];
626   }
627   /* reset ilen and imax for each row */
628   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
629   a->nz = ai[m];
630   for (i = 0; i < a->nz; i++) {
631     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
632     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
633     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
634   }
635   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
636   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
637   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
638 
639   A->info.mallocs += a->reallocs;
640   a->reallocs         = 0;
641   A->info.nz_unneeded = (double)fshift;
642   a->rmax             = rmax;
643   PetscCall(MatMarkDiagonal_BlockMat(A));
644   PetscFunctionReturn(0);
645 }
646 
647 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg) {
648   PetscFunctionBegin;
649   if (opt == MAT_SYMMETRIC && flg) {
650     A->ops->sor  = MatSOR_BlockMat_Symmetric;
651     A->ops->mult = MatMult_BlockMat_Symmetric;
652   } else {
653     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
659                                        NULL,
660                                        NULL,
661                                        MatMult_BlockMat,
662                                        /*  4*/ MatMultAdd_BlockMat,
663                                        MatMultTranspose_BlockMat,
664                                        MatMultTransposeAdd_BlockMat,
665                                        NULL,
666                                        NULL,
667                                        NULL,
668                                        /* 10*/ NULL,
669                                        NULL,
670                                        NULL,
671                                        MatSOR_BlockMat,
672                                        NULL,
673                                        /* 15*/ NULL,
674                                        NULL,
675                                        NULL,
676                                        NULL,
677                                        NULL,
678                                        /* 20*/ NULL,
679                                        MatAssemblyEnd_BlockMat,
680                                        MatSetOption_BlockMat,
681                                        NULL,
682                                        /* 24*/ NULL,
683                                        NULL,
684                                        NULL,
685                                        NULL,
686                                        NULL,
687                                        /* 29*/ NULL,
688                                        NULL,
689                                        NULL,
690                                        NULL,
691                                        NULL,
692                                        /* 34*/ NULL,
693                                        NULL,
694                                        NULL,
695                                        NULL,
696                                        NULL,
697                                        /* 39*/ NULL,
698                                        NULL,
699                                        NULL,
700                                        NULL,
701                                        NULL,
702                                        /* 44*/ NULL,
703                                        NULL,
704                                        MatShift_Basic,
705                                        NULL,
706                                        NULL,
707                                        /* 49*/ NULL,
708                                        NULL,
709                                        NULL,
710                                        NULL,
711                                        NULL,
712                                        /* 54*/ NULL,
713                                        NULL,
714                                        NULL,
715                                        NULL,
716                                        NULL,
717                                        /* 59*/ MatCreateSubMatrix_BlockMat,
718                                        MatDestroy_BlockMat,
719                                        MatView_BlockMat,
720                                        NULL,
721                                        NULL,
722                                        /* 64*/ NULL,
723                                        NULL,
724                                        NULL,
725                                        NULL,
726                                        NULL,
727                                        /* 69*/ NULL,
728                                        NULL,
729                                        NULL,
730                                        NULL,
731                                        NULL,
732                                        /* 74*/ NULL,
733                                        NULL,
734                                        NULL,
735                                        NULL,
736                                        NULL,
737                                        /* 79*/ NULL,
738                                        NULL,
739                                        NULL,
740                                        NULL,
741                                        MatLoad_BlockMat,
742                                        /* 84*/ NULL,
743                                        NULL,
744                                        NULL,
745                                        NULL,
746                                        NULL,
747                                        /* 89*/ NULL,
748                                        NULL,
749                                        NULL,
750                                        NULL,
751                                        NULL,
752                                        /* 94*/ NULL,
753                                        NULL,
754                                        NULL,
755                                        NULL,
756                                        NULL,
757                                        /* 99*/ NULL,
758                                        NULL,
759                                        NULL,
760                                        NULL,
761                                        NULL,
762                                        /*104*/ NULL,
763                                        NULL,
764                                        NULL,
765                                        NULL,
766                                        NULL,
767                                        /*109*/ NULL,
768                                        NULL,
769                                        NULL,
770                                        NULL,
771                                        NULL,
772                                        /*114*/ NULL,
773                                        NULL,
774                                        NULL,
775                                        NULL,
776                                        NULL,
777                                        /*119*/ NULL,
778                                        NULL,
779                                        NULL,
780                                        NULL,
781                                        NULL,
782                                        /*124*/ NULL,
783                                        NULL,
784                                        NULL,
785                                        NULL,
786                                        NULL,
787                                        /*129*/ NULL,
788                                        NULL,
789                                        NULL,
790                                        NULL,
791                                        NULL,
792                                        /*134*/ NULL,
793                                        NULL,
794                                        NULL,
795                                        NULL,
796                                        NULL,
797                                        /*139*/ NULL,
798                                        NULL,
799                                        NULL,
800                                        NULL,
801                                        NULL,
802                                        /*144*/ NULL,
803                                        NULL,
804                                        NULL,
805                                        NULL,
806                                        NULL,
807                                        NULL,
808                                        /*150*/ NULL};
809 
810 /*@C
811    MatBlockMatSetPreallocation - For good matrix assembly performance
812    the user should preallocate the matrix storage by setting the parameter nz
813    (or the array nnz).  By setting these parameters accurately, performance
814    during matrix assembly can be increased by more than a factor of 50.
815 
816    Collective
817 
818    Input Parameters:
819 +  B - The matrix
820 .  bs - size of each block in matrix
821 .  nz - number of nonzeros per block row (same for all rows)
822 -  nnz - array containing the number of nonzeros in the various block rows
823          (possibly different for each row) or NULL
824 
825    Notes:
826      If nnz is given then nz is ignored
827 
828    Specify the preallocated storage with either nz or nnz (not both).
829    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
830    allocation.  For large problems you MUST preallocate memory or you
831    will get TERRIBLE performance, see the users' manual chapter on matrices.
832 
833    Level: intermediate
834 
835 .seealso: `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
836 @*/
837 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) {
838   PetscFunctionBegin;
839   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
840   PetscFunctionReturn(0);
841 }
842 
843 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz) {
844   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
845   PetscInt      i;
846 
847   PetscFunctionBegin;
848   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
849   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
850   PetscCall(PetscLayoutSetUp(A->rmap));
851   PetscCall(PetscLayoutSetUp(A->cmap));
852   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
853 
854   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
855   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
856   if (nnz) {
857     for (i = 0; i < A->rmap->n / bs; i++) {
858       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
859       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
860     }
861   }
862   bmat->mbs = A->rmap->n / bs;
863 
864   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
865   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
866   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
867 
868   if (!bmat->imax) {
869     PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
870     PetscCall(PetscLogObjectMemory((PetscObject)A, 2 * A->rmap->n * sizeof(PetscInt)));
871   }
872   if (PetscLikely(nnz)) {
873     nz = 0;
874     for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
875       bmat->imax[i] = nnz[i];
876       nz += nnz[i];
877     }
878   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
879 
880   /* bmat->ilen will count nonzeros in each row so far. */
881   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
882 
883   /* allocate the matrix space */
884   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
885   PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
886   PetscCall(PetscLogObjectMemory((PetscObject)A, (A->rmap->n + 1) * sizeof(PetscInt) + nz * (sizeof(PetscScalar) + sizeof(PetscInt))));
887   bmat->i[0] = 0;
888   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
889   bmat->singlemalloc = PETSC_TRUE;
890   bmat->free_a       = PETSC_TRUE;
891   bmat->free_ij      = PETSC_TRUE;
892 
893   bmat->nz            = 0;
894   bmat->maxnz         = nz;
895   A->info.nz_unneeded = (double)bmat->maxnz;
896   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
897   PetscFunctionReturn(0);
898 }
899 
900 /*MC
901    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
902                  consisting of (usually) sparse blocks.
903 
904   Level: advanced
905 
906 .seealso: `MatCreateBlockMat()`
907 M*/
908 
909 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A) {
910   Mat_BlockMat *b;
911 
912   PetscFunctionBegin;
913   PetscCall(PetscNewLog(A, &b));
914   A->data = (void *)b;
915   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
916 
917   A->assembled    = PETSC_TRUE;
918   A->preallocated = PETSC_FALSE;
919   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
920 
921   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
922   PetscFunctionReturn(0);
923 }
924 
925 /*@C
926    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
927 
928   Collective
929 
930    Input Parameters:
931 +  comm - MPI communicator
932 .  m - number of rows
933 .  n  - number of columns
934 .  bs - size of each submatrix
935 .  nz  - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
936 -  nnz - expected number of nonzers per block row if known (use NULL otherwise)
937 
938    Output Parameter:
939 .  A - the matrix
940 
941    Level: intermediate
942 
943    Notes:
944     Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
945    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
946 
947    For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
948 
949    Developer Note:
950    I don't like the name, it is not `MATNESTMAT`
951 
952 .seealso: `MATBLOCKMAT`, `MatCreateNest()`
953 @*/
954 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A) {
955   PetscFunctionBegin;
956   PetscCall(MatCreate(comm, A));
957   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
958   PetscCall(MatSetType(*A, MATBLOCKMAT));
959   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
960   PetscFunctionReturn(0);
961 }
962