xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 57d508425293f0bb93f59574d14951d8faac9af8)
1 /*
2    This provides a matrix that consists of Mats
3 */
4 
5 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */
7 
8 typedef struct {
9   SEQAIJHEADER(Mat);
10   SEQBAIJHEADER;
11   Mat *diags;
12 
13   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
14 } Mat_BlockMat;
15 
16 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
17 {
18   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
19   PetscScalar       *x;
20   const Mat         *v;
21   const PetscScalar *b;
22   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
23   const PetscInt    *idx;
24   IS                 row, col;
25   MatFactorInfo      info;
26   Vec                left = a->left, right = a->right, middle = a->middle;
27   Mat               *diag;
28 
29   PetscFunctionBegin;
30   its = its * lits;
31   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
32   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
33   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
34   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
35   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
36 
37   if (!a->diags) {
38     PetscCall(PetscMalloc1(mbs, &a->diags));
39     PetscCall(MatFactorInfoInitialize(&info));
40     for (i = 0; i < mbs; i++) {
41       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
42       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
43       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
44       PetscCall(ISDestroy(&row));
45       PetscCall(ISDestroy(&col));
46     }
47     PetscCall(VecDuplicate(bb, &a->workb));
48   }
49   diag = a->diags;
50 
51   PetscCall(VecSet(xx, 0.0));
52   PetscCall(VecGetArray(xx, &x));
53   /* copy right-hand side because it must be modified during iteration */
54   PetscCall(VecCopy(bb, a->workb));
55   PetscCall(VecGetArrayRead(a->workb, &b));
56 
57   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
58   while (its--) {
59     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
60       for (i = 0; i < mbs; i++) {
61         n   = a->i[i + 1] - a->i[i] - 1;
62         idx = a->j + a->i[i] + 1;
63         v   = a->a + a->i[i] + 1;
64 
65         PetscCall(VecSet(left, 0.0));
66         for (j = 0; j < n; j++) {
67           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
68           PetscCall(MatMultAdd(v[j], right, left, left));
69           PetscCall(VecResetArray(right));
70         }
71         PetscCall(VecPlaceArray(right, b + i * bs));
72         PetscCall(VecAYPX(left, -1.0, right));
73         PetscCall(VecResetArray(right));
74 
75         PetscCall(VecPlaceArray(right, x + i * bs));
76         PetscCall(MatSolve(diag[i], left, right));
77 
78         /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
79         for (j = 0; j < n; j++) {
80           PetscCall(MatMultTranspose(v[j], right, left));
81           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
82           PetscCall(VecAXPY(middle, -1.0, left));
83           PetscCall(VecResetArray(middle));
84         }
85         PetscCall(VecResetArray(right));
86       }
87     }
88     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
89       for (i = mbs - 1; i >= 0; i--) {
90         n   = a->i[i + 1] - a->i[i] - 1;
91         idx = a->j + a->i[i] + 1;
92         v   = a->a + a->i[i] + 1;
93 
94         PetscCall(VecSet(left, 0.0));
95         for (j = 0; j < n; j++) {
96           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
97           PetscCall(MatMultAdd(v[j], right, left, left));
98           PetscCall(VecResetArray(right));
99         }
100         PetscCall(VecPlaceArray(right, b + i * bs));
101         PetscCall(VecAYPX(left, -1.0, right));
102         PetscCall(VecResetArray(right));
103 
104         PetscCall(VecPlaceArray(right, x + i * bs));
105         PetscCall(MatSolve(diag[i], left, right));
106         PetscCall(VecResetArray(right));
107       }
108     }
109   }
110   PetscCall(VecRestoreArray(xx, &x));
111   PetscCall(VecRestoreArrayRead(a->workb, &b));
112   PetscFunctionReturn(PETSC_SUCCESS);
113 }
114 
115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
116 {
117   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
118   PetscScalar       *x;
119   const Mat         *v;
120   const PetscScalar *b;
121   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
122   const PetscInt    *idx;
123   IS                 row, col;
124   MatFactorInfo      info;
125   Vec                left = a->left, right = a->right;
126   Mat               *diag;
127 
128   PetscFunctionBegin;
129   its = its * lits;
130   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
131   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
132   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
133   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
134 
135   if (!a->diags) {
136     PetscCall(PetscMalloc1(mbs, &a->diags));
137     PetscCall(MatFactorInfoInitialize(&info));
138     for (i = 0; i < mbs; i++) {
139       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
140       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
141       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
142       PetscCall(ISDestroy(&row));
143       PetscCall(ISDestroy(&col));
144     }
145   }
146   diag = a->diags;
147 
148   PetscCall(VecSet(xx, 0.0));
149   PetscCall(VecGetArray(xx, &x));
150   PetscCall(VecGetArrayRead(bb, &b));
151 
152   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
153   while (its--) {
154     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
155       for (i = 0; i < mbs; i++) {
156         n   = a->i[i + 1] - a->i[i];
157         idx = a->j + a->i[i];
158         v   = a->a + a->i[i];
159 
160         PetscCall(VecSet(left, 0.0));
161         for (j = 0; j < n; j++) {
162           if (idx[j] != i) {
163             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
164             PetscCall(MatMultAdd(v[j], right, left, left));
165             PetscCall(VecResetArray(right));
166           }
167         }
168         PetscCall(VecPlaceArray(right, b + i * bs));
169         PetscCall(VecAYPX(left, -1.0, right));
170         PetscCall(VecResetArray(right));
171 
172         PetscCall(VecPlaceArray(right, x + i * bs));
173         PetscCall(MatSolve(diag[i], left, right));
174         PetscCall(VecResetArray(right));
175       }
176     }
177     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178       for (i = mbs - 1; i >= 0; i--) {
179         n   = a->i[i + 1] - a->i[i];
180         idx = a->j + a->i[i];
181         v   = a->a + a->i[i];
182 
183         PetscCall(VecSet(left, 0.0));
184         for (j = 0; j < n; j++) {
185           if (idx[j] != i) {
186             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
187             PetscCall(MatMultAdd(v[j], right, left, left));
188             PetscCall(VecResetArray(right));
189           }
190         }
191         PetscCall(VecPlaceArray(right, b + i * bs));
192         PetscCall(VecAYPX(left, -1.0, right));
193         PetscCall(VecResetArray(right));
194 
195         PetscCall(VecPlaceArray(right, x + i * bs));
196         PetscCall(MatSolve(diag[i], left, right));
197         PetscCall(VecResetArray(right));
198       }
199     }
200   }
201   PetscCall(VecRestoreArray(xx, &x));
202   PetscCall(VecRestoreArrayRead(bb, &b));
203   PetscFunctionReturn(PETSC_SUCCESS);
204 }
205 
206 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
207 {
208   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
209   PetscInt     *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1;
210   PetscInt     *ai = a->i, *ailen = a->ilen;
211   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
212   PetscInt      ridx, cidx;
213   PetscBool     roworiented = a->roworiented;
214   MatScalar     value;
215   Mat          *ap, *aa = a->a;
216 
217   PetscFunctionBegin;
218   for (k = 0; k < m; k++) { /* loop over added rows */
219     row  = im[k];
220     brow = row / bs;
221     if (row < 0) continue;
222     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
223     rp   = aj + ai[brow];
224     ap   = aa + ai[brow];
225     nrow = ailen[brow];
226     low  = 0;
227     high = nrow;
228     for (l = 0; l < n; l++) { /* loop over added columns */
229       if (in[l] < 0) continue;
230       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
231       col  = in[l];
232       bcol = col / bs;
233       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
234       ridx = row % bs;
235       cidx = col % bs;
236       if (roworiented) value = v[l + k * n];
237       else value = v[k + l * m];
238 
239       if (col <= lastcol) low = 0;
240       else high = nrow;
241       lastcol = col;
242       while (high - low > 7) {
243         t = (low + high) / 2;
244         if (rp[t] > bcol) high = t;
245         else low = t;
246       }
247       for (i = low; i < high; i++) {
248         if (rp[i] > bcol) break;
249         if (rp[i] == bcol) goto noinsert1;
250       }
251       if (nonew == 1) goto noinsert1;
252       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col);
253 #if 0
254       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
255       N = nrow++ - 1;
256       high++;
257       /* shift up all the later entries in this row */
258       for (ii = N; ii >= i; ii--) {
259         rp[ii + 1] = rp[ii];
260         ap[ii + 1] = ap[ii];
261       }
262       if (N >= i) ap[i] = NULL;
263       rp[i] = bcol;
264       a->nz++;
265 #endif
266     noinsert1:;
267       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
268       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
269       low = i;
270     }
271     ailen[brow] = nrow;
272   }
273   PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 
276 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
277 {
278   Mat                tmpA;
279   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
280   const PetscInt    *cols;
281   const PetscScalar *values;
282   PetscBool          flg = PETSC_FALSE, notdone;
283   Mat_SeqAIJ        *a;
284   Mat_BlockMat      *amat;
285 
286   PetscFunctionBegin;
287   /* force binary viewer to load .info file if it has not yet done so */
288   PetscCall(PetscViewerSetUp(viewer));
289   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
290   PetscCall(MatSetType(tmpA, MATSEQAIJ));
291   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
292 
293   PetscCall(MatGetLocalSize(tmpA, &m, &n));
294   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
295   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
296   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
297   PetscOptionsEnd();
298 
299   /* Determine number of nonzero blocks for each block row */
300   a   = (Mat_SeqAIJ *)tmpA->data;
301   mbs = m / bs;
302   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
303   PetscCall(PetscArrayzero(lens, mbs));
304 
305   for (i = 0; i < mbs; i++) {
306     for (j = 0; j < bs; j++) {
307       ii[j]    = a->j + a->i[i * bs + j];
308       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
309     }
310 
311     currentcol = -1;
312     while (PETSC_TRUE) {
313       notdone = PETSC_FALSE;
314       nextcol = 1000000000;
315       for (j = 0; j < bs; j++) {
316         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
317           ii[j]++;
318           ilens[j]--;
319         }
320         if (ilens[j] > 0) {
321           notdone = PETSC_TRUE;
322           nextcol = PetscMin(nextcol, ii[j][0] / bs);
323         }
324       }
325       if (!notdone) break;
326       if (!flg || (nextcol >= i)) lens[i]++;
327       currentcol = nextcol;
328     }
329   }
330 
331   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
332   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
333   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
334   amat = (Mat_BlockMat *)newmat->data;
335 
336   /* preallocate the submatrices */
337   PetscCall(PetscMalloc1(bs, &llens));
338   for (i = 0; i < mbs; i++) { /* loops for block rows */
339     for (j = 0; j < bs; j++) {
340       ii[j]    = a->j + a->i[i * bs + j];
341       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
342     }
343 
344     currentcol = 1000000000;
345     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
346       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
347     }
348 
349     while (PETSC_TRUE) { /* loops over blocks in block row */
350       notdone = PETSC_FALSE;
351       nextcol = 1000000000;
352       PetscCall(PetscArrayzero(llens, bs));
353       for (j = 0; j < bs; j++) {                              /* loop over rows in block */
354         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
355           ii[j]++;
356           ilens[j]--;
357           llens[j]++;
358         }
359         if (ilens[j] > 0) {
360           notdone = PETSC_TRUE;
361           nextcol = PetscMin(nextcol, ii[j][0] / bs);
362         }
363       }
364       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
365       if (!flg || currentcol >= i) {
366         amat->j[cnt] = currentcol;
367         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
368       }
369 
370       if (!notdone) break;
371       currentcol = nextcol;
372     }
373     amat->ilen[i] = lens[i];
374   }
375 
376   PetscCall(PetscFree3(lens, ii, ilens));
377   PetscCall(PetscFree(llens));
378 
379   /* copy over the matrix, one row at a time */
380   for (i = 0; i < m; i++) {
381     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
382     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
383     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
384   }
385   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
386   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
387   PetscFunctionReturn(PETSC_SUCCESS);
388 }
389 
390 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
391 {
392   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
393   const char       *name;
394   PetscViewerFormat format;
395 
396   PetscFunctionBegin;
397   PetscCall(PetscObjectGetName((PetscObject)A, &name));
398   PetscCall(PetscViewerGetFormat(viewer, &format));
399   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
400     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
401     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
402   }
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
407 {
408   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
409   PetscInt      i;
410 
411   PetscFunctionBegin;
412   PetscCall(VecDestroy(&bmat->right));
413   PetscCall(VecDestroy(&bmat->left));
414   PetscCall(VecDestroy(&bmat->middle));
415   PetscCall(VecDestroy(&bmat->workb));
416   if (bmat->diags) {
417     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
418   }
419   if (bmat->a) {
420     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
421   }
422   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
423   PetscCall(PetscFree(mat->data));
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
428 {
429   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
430   PetscScalar  *xx, *yy;
431   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
432   Mat          *aa;
433 
434   PetscFunctionBegin;
435   /*
436      Standard CSR multiply except each entry is a Mat
437   */
438   PetscCall(VecGetArray(x, &xx));
439 
440   PetscCall(VecSet(y, 0.0));
441   PetscCall(VecGetArray(y, &yy));
442   aj = bmat->j;
443   aa = bmat->a;
444   ii = bmat->i;
445   for (i = 0; i < m; i++) {
446     jrow = ii[i];
447     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
448     n = ii[i + 1] - jrow;
449     for (j = 0; j < n; j++) {
450       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
451       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
452       PetscCall(VecResetArray(bmat->right));
453       jrow++;
454     }
455     PetscCall(VecResetArray(bmat->left));
456   }
457   PetscCall(VecRestoreArray(x, &xx));
458   PetscCall(VecRestoreArray(y, &yy));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
463 {
464   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
465   PetscScalar  *xx, *yy;
466   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
467   Mat          *aa;
468 
469   PetscFunctionBegin;
470   /*
471      Standard CSR multiply except each entry is a Mat
472   */
473   PetscCall(VecGetArray(x, &xx));
474 
475   PetscCall(VecSet(y, 0.0));
476   PetscCall(VecGetArray(y, &yy));
477   aj = bmat->j;
478   aa = bmat->a;
479   ii = bmat->i;
480   for (i = 0; i < m; i++) {
481     jrow = ii[i];
482     n    = ii[i + 1] - jrow;
483     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
484     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
485     /* if we ALWAYS required a diagonal entry then could remove this if test */
486     if (aj[jrow] == i) {
487       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
488       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
489       PetscCall(VecResetArray(bmat->right));
490       jrow++;
491       n--;
492     }
493     for (j = 0; j < n; j++) {
494       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
495       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
496       PetscCall(VecResetArray(bmat->right));
497 
498       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
499       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
500       PetscCall(VecResetArray(bmat->right));
501       jrow++;
502     }
503     PetscCall(VecResetArray(bmat->left));
504     PetscCall(VecResetArray(bmat->middle));
505   }
506   PetscCall(VecRestoreArray(x, &xx));
507   PetscCall(VecRestoreArray(y, &yy));
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
511 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
512 {
513   PetscFunctionBegin;
514   PetscFunctionReturn(PETSC_SUCCESS);
515 }
516 
517 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
518 {
519   PetscFunctionBegin;
520   PetscFunctionReturn(PETSC_SUCCESS);
521 }
522 
523 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
524 {
525   PetscFunctionBegin;
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 /*
530      Adds diagonal pointers to sparse matrix nonzero structure.
531 */
532 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
533 {
534   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
535   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;
536 
537   PetscFunctionBegin;
538   if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
539   for (i = 0; i < mbs; i++) {
540     a->diag[i] = a->i[i + 1];
541     for (j = a->i[i]; j < a->i[i + 1]; j++) {
542       if (a->j[j] == i) {
543         a->diag[i] = j;
544         break;
545       }
546     }
547   }
548   PetscFunctionReturn(PETSC_SUCCESS);
549 }
550 
551 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
552 {
553   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
554   Mat_SeqAIJ   *c;
555   PetscInt      i, k, first, step, lensi, nrows, ncols;
556   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
557   PetscScalar  *a_new;
558   Mat           C, *aa = a->a;
559   PetscBool     stride, equal;
560 
561   PetscFunctionBegin;
562   PetscCall(ISEqual(isrow, iscol, &equal));
563   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
564   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
565   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
566   PetscCall(ISStrideGetInfo(iscol, &first, &step));
567   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
568 
569   PetscCall(ISGetLocalSize(isrow, &nrows));
570   ncols = nrows;
571 
572   /* create submatrix */
573   if (scall == MAT_REUSE_MATRIX) {
574     PetscInt n_cols, n_rows;
575     C = *B;
576     PetscCall(MatGetSize(C, &n_rows, &n_cols));
577     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
578     PetscCall(MatZeroEntries(C));
579   } else {
580     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
581     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
582     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
583     else PetscCall(MatSetType(C, MATSEQAIJ));
584     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
585     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
586   }
587   c = (Mat_SeqAIJ *)C->data;
588 
589   /* loop over rows inserting into submatrix */
590   a_new = c->a;
591   j_new = c->j;
592   i_new = c->i;
593 
594   for (i = 0; i < nrows; i++) {
595     lensi = ailen[i];
596     for (k = 0; k < lensi; k++) {
597       *j_new++ = *aj++;
598       PetscCall(MatGetValue(*aa++, first, first, a_new++));
599     }
600     i_new[i + 1] = i_new[i] + lensi;
601     c->ilen[i]   = lensi;
602   }
603 
604   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
605   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
606   *B = C;
607   PetscFunctionReturn(PETSC_SUCCESS);
608 }
609 
610 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
611 {
612   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
613   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
614   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
615   Mat          *aa = a->a, *ap;
616 
617   PetscFunctionBegin;
618   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
619 
620   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
621   for (i = 1; i < m; i++) {
622     /* move each row back by the amount of empty slots (fshift) before it*/
623     fshift += imax[i - 1] - ailen[i - 1];
624     rmax = PetscMax(rmax, ailen[i]);
625     if (fshift) {
626       ip = aj + ai[i];
627       ap = aa + ai[i];
628       N  = ailen[i];
629       for (j = 0; j < N; j++) {
630         ip[j - fshift] = ip[j];
631         ap[j - fshift] = ap[j];
632       }
633     }
634     ai[i] = ai[i - 1] + ailen[i - 1];
635   }
636   if (m) {
637     fshift += imax[m - 1] - ailen[m - 1];
638     ai[m] = ai[m - 1] + ailen[m - 1];
639   }
640   /* reset ilen and imax for each row */
641   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
642   a->nz = ai[m];
643   for (i = 0; i < a->nz; i++) {
644     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
645     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
646     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
647   }
648   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
649   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
650   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
651 
652   A->info.mallocs += a->reallocs;
653   a->reallocs         = 0;
654   A->info.nz_unneeded = (double)fshift;
655   a->rmax             = rmax;
656   PetscCall(MatMarkDiagonal_BlockMat(A));
657   PetscFunctionReturn(PETSC_SUCCESS);
658 }
659 
660 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
661 {
662   PetscFunctionBegin;
663   if (opt == MAT_SYMMETRIC && flg) {
664     A->ops->sor  = MatSOR_BlockMat_Symmetric;
665     A->ops->mult = MatMult_BlockMat_Symmetric;
666   } else {
667     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
668   }
669   PetscFunctionReturn(PETSC_SUCCESS);
670 }
671 
672 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
673                                        NULL,
674                                        NULL,
675                                        MatMult_BlockMat,
676                                        /*  4*/ MatMultAdd_BlockMat,
677                                        MatMultTranspose_BlockMat,
678                                        MatMultTransposeAdd_BlockMat,
679                                        NULL,
680                                        NULL,
681                                        NULL,
682                                        /* 10*/ NULL,
683                                        NULL,
684                                        NULL,
685                                        MatSOR_BlockMat,
686                                        NULL,
687                                        /* 15*/ NULL,
688                                        NULL,
689                                        NULL,
690                                        NULL,
691                                        NULL,
692                                        /* 20*/ NULL,
693                                        MatAssemblyEnd_BlockMat,
694                                        MatSetOption_BlockMat,
695                                        NULL,
696                                        /* 24*/ NULL,
697                                        NULL,
698                                        NULL,
699                                        NULL,
700                                        NULL,
701                                        /* 29*/ NULL,
702                                        NULL,
703                                        NULL,
704                                        NULL,
705                                        NULL,
706                                        /* 34*/ NULL,
707                                        NULL,
708                                        NULL,
709                                        NULL,
710                                        NULL,
711                                        /* 39*/ NULL,
712                                        NULL,
713                                        NULL,
714                                        NULL,
715                                        NULL,
716                                        /* 44*/ NULL,
717                                        NULL,
718                                        MatShift_Basic,
719                                        NULL,
720                                        NULL,
721                                        /* 49*/ NULL,
722                                        NULL,
723                                        NULL,
724                                        NULL,
725                                        NULL,
726                                        /* 54*/ NULL,
727                                        NULL,
728                                        NULL,
729                                        NULL,
730                                        NULL,
731                                        /* 59*/ MatCreateSubMatrix_BlockMat,
732                                        MatDestroy_BlockMat,
733                                        MatView_BlockMat,
734                                        NULL,
735                                        NULL,
736                                        /* 64*/ NULL,
737                                        NULL,
738                                        NULL,
739                                        NULL,
740                                        NULL,
741                                        /* 69*/ NULL,
742                                        NULL,
743                                        NULL,
744                                        NULL,
745                                        NULL,
746                                        /* 74*/ NULL,
747                                        NULL,
748                                        NULL,
749                                        NULL,
750                                        MatLoad_BlockMat,
751                                        /* 79*/ NULL,
752                                        NULL,
753                                        NULL,
754                                        NULL,
755                                        NULL,
756                                        /* 84*/ NULL,
757                                        NULL,
758                                        NULL,
759                                        NULL,
760                                        NULL,
761                                        /* 89*/ NULL,
762                                        NULL,
763                                        NULL,
764                                        NULL,
765                                        NULL,
766                                        /* 94*/ NULL,
767                                        NULL,
768                                        NULL,
769                                        NULL,
770                                        NULL,
771                                        /* 99*/ NULL,
772                                        NULL,
773                                        NULL,
774                                        NULL,
775                                        NULL,
776                                        /*104*/ NULL,
777                                        NULL,
778                                        NULL,
779                                        NULL,
780                                        NULL,
781                                        /*109*/ NULL,
782                                        NULL,
783                                        NULL,
784                                        NULL,
785                                        NULL,
786                                        /*114*/ NULL,
787                                        NULL,
788                                        NULL,
789                                        NULL,
790                                        NULL,
791                                        /*119*/ NULL,
792                                        NULL,
793                                        NULL,
794                                        NULL,
795                                        NULL,
796                                        /*124*/ NULL,
797                                        NULL,
798                                        NULL,
799                                        NULL,
800                                        NULL,
801                                        /*129*/ NULL,
802                                        NULL,
803                                        NULL,
804                                        NULL,
805                                        NULL,
806                                        /*134*/ NULL,
807                                        NULL,
808                                        NULL,
809                                        NULL,
810                                        NULL,
811                                        /*139*/ NULL,
812                                        NULL,
813                                        NULL,
814                                        NULL,
815                                        NULL,
816                                        NULL};
817 
818 /*@C
819   MatBlockMatSetPreallocation - For good matrix assembly performance
820   the user should preallocate the matrix storage by setting the parameter nz
821   (or the array nnz).  By setting these parameters accurately, performance
822   during matrix assembly can be increased by more than a factor of 50.
823 
824   Collective
825 
826   Input Parameters:
827 + B   - The matrix
828 . bs  - size of each block in matrix
829 . nz  - number of nonzeros per block row (same for all rows)
830 - nnz - array containing the number of nonzeros in the various block rows
831          (possibly different for each row) or `NULL`
832 
833   Level: intermediate
834 
835   Notes:
836   If `nnz` is given then `nz` is ignored
837 
838   Specify the preallocated storage with either `nz` or `nnz` (not both).
839   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
840   allocation.
841 
842 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
843 @*/
844 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
845 {
846   PetscFunctionBegin;
847   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
848   PetscFunctionReturn(PETSC_SUCCESS);
849 }
850 
851 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
852 {
853   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
854   PetscInt      i;
855 
856   PetscFunctionBegin;
857   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
858   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
859   PetscCall(PetscLayoutSetUp(A->rmap));
860   PetscCall(PetscLayoutSetUp(A->cmap));
861   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
862 
863   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
864   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
865   if (nnz) {
866     for (i = 0; i < A->rmap->n / bs; i++) {
867       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
868       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
869     }
870   }
871   bmat->mbs = A->rmap->n / bs;
872 
873   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
874   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
875   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
876 
877   if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
878   PetscCheck(PetscLikely(nnz), PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
879   nz = 0;
880   for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
881     bmat->imax[i] = nnz[i];
882     nz += nnz[i];
883   }
884 
885   /* bmat->ilen will count nonzeros in each row so far. */
886   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
887 
888   /* allocate the matrix space */
889   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
890   PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a));
891   PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j));
892   PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i));
893   bmat->free_a  = PETSC_TRUE;
894   bmat->free_ij = PETSC_TRUE;
895 
896   bmat->i[0] = 0;
897   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
898 
899   bmat->nz            = 0;
900   bmat->maxnz         = nz;
901   A->info.nz_unneeded = (double)bmat->maxnz;
902   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 /*MC
907    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
908                  consisting of (usually) sparse blocks.
909 
910   Level: advanced
911 
912 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
913 M*/
914 
915 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
916 {
917   Mat_BlockMat *b;
918 
919   PetscFunctionBegin;
920   PetscCall(PetscNew(&b));
921   A->data         = (void *)b;
922   A->ops[0]       = MatOps_Values;
923   A->assembled    = PETSC_TRUE;
924   A->preallocated = PETSC_FALSE;
925   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
926 
927   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
928   PetscFunctionReturn(PETSC_SUCCESS);
929 }
930 
931 /*@C
932   MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
933 
934   Collective
935 
936   Input Parameters:
937 + comm - MPI communicator
938 . m    - number of rows
939 . n    - number of columns
940 . bs   - size of each submatrix
941 . nz   - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
942 - nnz  - expected number of nonzers per block row if known (use `NULL` otherwise)
943 
944   Output Parameter:
945 . A - the matrix
946 
947   Level: intermediate
948 
949   Notes:
950   Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
951   have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
952 
953   For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
954 
955   Developer Notes:
956   I don't like the name, it is not `MATNESTMAT`
957 
958 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
959 @*/
960 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
961 {
962   PetscFunctionBegin;
963   PetscCall(MatCreate(comm, A));
964   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
965   PetscCall(MatSetType(*A, MATBLOCKMAT));
966   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
967   PetscFunctionReturn(PETSC_SUCCESS);
968 }
969