xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision b2e8165f902b29d33cb5c6fcd6da9ceca759ca7a)
1 /*
2    This provides a matrix that consists of Mats
3 */
4 
5 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */
7 
8 typedef struct {
9   SEQAIJHEADER(Mat);
10   SEQBAIJHEADER;
11   Mat *diags;
12 
13   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
14 } Mat_BlockMat;
15 
16 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
17 {
18   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
19   PetscScalar       *x;
20   const Mat         *v;
21   const PetscScalar *b;
22   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
23   const PetscInt    *idx;
24   IS                 row, col;
25   MatFactorInfo      info;
26   Vec                left = a->left, right = a->right, middle = a->middle;
27   Mat               *diag;
28 
29   PetscFunctionBegin;
30   its = its * lits;
31   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
32   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
33   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
34   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
35   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
36 
37   if (!a->diags) {
38     PetscCall(PetscMalloc1(mbs, &a->diags));
39     PetscCall(MatFactorInfoInitialize(&info));
40     for (i = 0; i < mbs; i++) {
41       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
42       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
43       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
44       PetscCall(ISDestroy(&row));
45       PetscCall(ISDestroy(&col));
46     }
47     PetscCall(VecDuplicate(bb, &a->workb));
48   }
49   diag = a->diags;
50 
51   PetscCall(VecSet(xx, 0.0));
52   PetscCall(VecGetArray(xx, &x));
53   /* copy right-hand side because it must be modified during iteration */
54   PetscCall(VecCopy(bb, a->workb));
55   PetscCall(VecGetArrayRead(a->workb, &b));
56 
57   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
58   while (its--) {
59     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
60       for (i = 0; i < mbs; i++) {
61         n   = a->i[i + 1] - a->i[i] - 1;
62         idx = a->j + a->i[i] + 1;
63         v   = a->a + a->i[i] + 1;
64 
65         PetscCall(VecSet(left, 0.0));
66         for (j = 0; j < n; j++) {
67           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
68           PetscCall(MatMultAdd(v[j], right, left, left));
69           PetscCall(VecResetArray(right));
70         }
71         PetscCall(VecPlaceArray(right, b + i * bs));
72         PetscCall(VecAYPX(left, -1.0, right));
73         PetscCall(VecResetArray(right));
74 
75         PetscCall(VecPlaceArray(right, x + i * bs));
76         PetscCall(MatSolve(diag[i], left, right));
77 
78         /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
79         for (j = 0; j < n; j++) {
80           PetscCall(MatMultTranspose(v[j], right, left));
81           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
82           PetscCall(VecAXPY(middle, -1.0, left));
83           PetscCall(VecResetArray(middle));
84         }
85         PetscCall(VecResetArray(right));
86       }
87     }
88     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
89       for (i = mbs - 1; i >= 0; i--) {
90         n   = a->i[i + 1] - a->i[i] - 1;
91         idx = a->j + a->i[i] + 1;
92         v   = a->a + a->i[i] + 1;
93 
94         PetscCall(VecSet(left, 0.0));
95         for (j = 0; j < n; j++) {
96           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
97           PetscCall(MatMultAdd(v[j], right, left, left));
98           PetscCall(VecResetArray(right));
99         }
100         PetscCall(VecPlaceArray(right, b + i * bs));
101         PetscCall(VecAYPX(left, -1.0, right));
102         PetscCall(VecResetArray(right));
103 
104         PetscCall(VecPlaceArray(right, x + i * bs));
105         PetscCall(MatSolve(diag[i], left, right));
106         PetscCall(VecResetArray(right));
107       }
108     }
109   }
110   PetscCall(VecRestoreArray(xx, &x));
111   PetscCall(VecRestoreArrayRead(a->workb, &b));
112   PetscFunctionReturn(PETSC_SUCCESS);
113 }
114 
115 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
116 {
117   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
118   PetscScalar       *x;
119   const Mat         *v;
120   const PetscScalar *b;
121   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
122   const PetscInt    *idx;
123   IS                 row, col;
124   MatFactorInfo      info;
125   Vec                left = a->left, right = a->right;
126   Mat               *diag;
127 
128   PetscFunctionBegin;
129   its = its * lits;
130   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
131   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
132   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
133   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
134 
135   if (!a->diags) {
136     PetscCall(PetscMalloc1(mbs, &a->diags));
137     PetscCall(MatFactorInfoInitialize(&info));
138     for (i = 0; i < mbs; i++) {
139       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
140       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
141       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
142       PetscCall(ISDestroy(&row));
143       PetscCall(ISDestroy(&col));
144     }
145   }
146   diag = a->diags;
147 
148   PetscCall(VecSet(xx, 0.0));
149   PetscCall(VecGetArray(xx, &x));
150   PetscCall(VecGetArrayRead(bb, &b));
151 
152   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
153   while (its--) {
154     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
155       for (i = 0; i < mbs; i++) {
156         n   = a->i[i + 1] - a->i[i];
157         idx = a->j + a->i[i];
158         v   = a->a + a->i[i];
159 
160         PetscCall(VecSet(left, 0.0));
161         for (j = 0; j < n; j++) {
162           if (idx[j] != i) {
163             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
164             PetscCall(MatMultAdd(v[j], right, left, left));
165             PetscCall(VecResetArray(right));
166           }
167         }
168         PetscCall(VecPlaceArray(right, b + i * bs));
169         PetscCall(VecAYPX(left, -1.0, right));
170         PetscCall(VecResetArray(right));
171 
172         PetscCall(VecPlaceArray(right, x + i * bs));
173         PetscCall(MatSolve(diag[i], left, right));
174         PetscCall(VecResetArray(right));
175       }
176     }
177     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178       for (i = mbs - 1; i >= 0; i--) {
179         n   = a->i[i + 1] - a->i[i];
180         idx = a->j + a->i[i];
181         v   = a->a + a->i[i];
182 
183         PetscCall(VecSet(left, 0.0));
184         for (j = 0; j < n; j++) {
185           if (idx[j] != i) {
186             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
187             PetscCall(MatMultAdd(v[j], right, left, left));
188             PetscCall(VecResetArray(right));
189           }
190         }
191         PetscCall(VecPlaceArray(right, b + i * bs));
192         PetscCall(VecAYPX(left, -1.0, right));
193         PetscCall(VecResetArray(right));
194 
195         PetscCall(VecPlaceArray(right, x + i * bs));
196         PetscCall(MatSolve(diag[i], left, right));
197         PetscCall(VecResetArray(right));
198       }
199     }
200   }
201   PetscCall(VecRestoreArray(xx, &x));
202   PetscCall(VecRestoreArrayRead(bb, &b));
203   PetscFunctionReturn(PETSC_SUCCESS);
204 }
205 
206 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
207 {
208   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
209   PetscInt     *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
210   PetscInt     *imax = a->imax, *ai = a->i, *ailen = a->ilen;
211   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
212   PetscInt      ridx, cidx;
213   PetscBool     roworiented = a->roworiented;
214   MatScalar     value;
215   Mat          *ap, *aa = a->a;
216 
217   PetscFunctionBegin;
218   for (k = 0; k < m; k++) { /* loop over added rows */
219     row  = im[k];
220     brow = row / bs;
221     if (row < 0) continue;
222     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
223     rp   = aj + ai[brow];
224     ap   = aa + ai[brow];
225     rmax = imax[brow];
226     nrow = ailen[brow];
227     low  = 0;
228     high = nrow;
229     for (l = 0; l < n; l++) { /* loop over added columns */
230       if (in[l] < 0) continue;
231       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
232       col  = in[l];
233       bcol = col / bs;
234       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
235       ridx = row % bs;
236       cidx = col % bs;
237       if (roworiented) value = v[l + k * n];
238       else value = v[k + l * m];
239 
240       if (col <= lastcol) low = 0;
241       else high = nrow;
242       lastcol = col;
243       while (high - low > 7) {
244         t = (low + high) / 2;
245         if (rp[t] > bcol) high = t;
246         else low = t;
247       }
248       for (i = low; i < high; i++) {
249         if (rp[i] > bcol) break;
250         if (rp[i] == bcol) goto noinsert1;
251       }
252       if (nonew == 1) goto noinsert1;
253       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
254       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
255       N = nrow++ - 1;
256       high++;
257       /* shift up all the later entries in this row */
258       for (ii = N; ii >= i; ii--) {
259         rp[ii + 1] = rp[ii];
260         ap[ii + 1] = ap[ii];
261       }
262       if (N >= i) ap[i] = NULL;
263       rp[i] = bcol;
264       a->nz++;
265     noinsert1:;
266       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
267       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
268       low = i;
269     }
270     ailen[brow] = nrow;
271   }
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
276 {
277   Mat                tmpA;
278   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
279   const PetscInt    *cols;
280   const PetscScalar *values;
281   PetscBool          flg = PETSC_FALSE, notdone;
282   Mat_SeqAIJ        *a;
283   Mat_BlockMat      *amat;
284 
285   PetscFunctionBegin;
286   /* force binary viewer to load .info file if it has not yet done so */
287   PetscCall(PetscViewerSetUp(viewer));
288   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
289   PetscCall(MatSetType(tmpA, MATSEQAIJ));
290   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
291 
292   PetscCall(MatGetLocalSize(tmpA, &m, &n));
293   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
294   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
295   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
296   PetscOptionsEnd();
297 
298   /* Determine number of nonzero blocks for each block row */
299   a   = (Mat_SeqAIJ *)tmpA->data;
300   mbs = m / bs;
301   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
302   PetscCall(PetscArrayzero(lens, mbs));
303 
304   for (i = 0; i < mbs; i++) {
305     for (j = 0; j < bs; j++) {
306       ii[j]    = a->j + a->i[i * bs + j];
307       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
308     }
309 
310     currentcol = -1;
311     while (PETSC_TRUE) {
312       notdone = PETSC_FALSE;
313       nextcol = 1000000000;
314       for (j = 0; j < bs; j++) {
315         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
316           ii[j]++;
317           ilens[j]--;
318         }
319         if (ilens[j] > 0) {
320           notdone = PETSC_TRUE;
321           nextcol = PetscMin(nextcol, ii[j][0] / bs);
322         }
323       }
324       if (!notdone) break;
325       if (!flg || (nextcol >= i)) lens[i]++;
326       currentcol = nextcol;
327     }
328   }
329 
330   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
331   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
332   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
333   amat = (Mat_BlockMat *)(newmat)->data;
334 
335   /* preallocate the submatrices */
336   PetscCall(PetscMalloc1(bs, &llens));
337   for (i = 0; i < mbs; i++) { /* loops for block rows */
338     for (j = 0; j < bs; j++) {
339       ii[j]    = a->j + a->i[i * bs + j];
340       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
341     }
342 
343     currentcol = 1000000000;
344     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
345       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
346     }
347 
348     while (PETSC_TRUE) { /* loops over blocks in block row */
349       notdone = PETSC_FALSE;
350       nextcol = 1000000000;
351       PetscCall(PetscArrayzero(llens, bs));
352       for (j = 0; j < bs; j++) {                              /* loop over rows in block */
353         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
354           ii[j]++;
355           ilens[j]--;
356           llens[j]++;
357         }
358         if (ilens[j] > 0) {
359           notdone = PETSC_TRUE;
360           nextcol = PetscMin(nextcol, ii[j][0] / bs);
361         }
362       }
363       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
364       if (!flg || currentcol >= i) {
365         amat->j[cnt] = currentcol;
366         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
367       }
368 
369       if (!notdone) break;
370       currentcol = nextcol;
371     }
372     amat->ilen[i] = lens[i];
373   }
374 
375   PetscCall(PetscFree3(lens, ii, ilens));
376   PetscCall(PetscFree(llens));
377 
378   /* copy over the matrix, one row at a time */
379   for (i = 0; i < m; i++) {
380     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
381     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
382     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
383   }
384   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
385   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
386   PetscFunctionReturn(PETSC_SUCCESS);
387 }
388 
389 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
390 {
391   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
392   const char       *name;
393   PetscViewerFormat format;
394 
395   PetscFunctionBegin;
396   PetscCall(PetscObjectGetName((PetscObject)A, &name));
397   PetscCall(PetscViewerGetFormat(viewer, &format));
398   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
399     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
400     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
401   }
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
405 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
406 {
407   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
408   PetscInt      i;
409 
410   PetscFunctionBegin;
411   PetscCall(VecDestroy(&bmat->right));
412   PetscCall(VecDestroy(&bmat->left));
413   PetscCall(VecDestroy(&bmat->middle));
414   PetscCall(VecDestroy(&bmat->workb));
415   if (bmat->diags) {
416     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
417   }
418   if (bmat->a) {
419     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
420   }
421   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
422   PetscCall(PetscFree(mat->data));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
427 {
428   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
429   PetscScalar  *xx, *yy;
430   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
431   Mat          *aa;
432 
433   PetscFunctionBegin;
434   /*
435      Standard CSR multiply except each entry is a Mat
436   */
437   PetscCall(VecGetArray(x, &xx));
438 
439   PetscCall(VecSet(y, 0.0));
440   PetscCall(VecGetArray(y, &yy));
441   aj = bmat->j;
442   aa = bmat->a;
443   ii = bmat->i;
444   for (i = 0; i < m; i++) {
445     jrow = ii[i];
446     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
447     n = ii[i + 1] - jrow;
448     for (j = 0; j < n; j++) {
449       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
450       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
451       PetscCall(VecResetArray(bmat->right));
452       jrow++;
453     }
454     PetscCall(VecResetArray(bmat->left));
455   }
456   PetscCall(VecRestoreArray(x, &xx));
457   PetscCall(VecRestoreArray(y, &yy));
458   PetscFunctionReturn(PETSC_SUCCESS);
459 }
460 
461 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
462 {
463   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
464   PetscScalar  *xx, *yy;
465   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
466   Mat          *aa;
467 
468   PetscFunctionBegin;
469   /*
470      Standard CSR multiply except each entry is a Mat
471   */
472   PetscCall(VecGetArray(x, &xx));
473 
474   PetscCall(VecSet(y, 0.0));
475   PetscCall(VecGetArray(y, &yy));
476   aj = bmat->j;
477   aa = bmat->a;
478   ii = bmat->i;
479   for (i = 0; i < m; i++) {
480     jrow = ii[i];
481     n    = ii[i + 1] - jrow;
482     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
483     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
484     /* if we ALWAYS required a diagonal entry then could remove this if test */
485     if (aj[jrow] == i) {
486       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
487       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
488       PetscCall(VecResetArray(bmat->right));
489       jrow++;
490       n--;
491     }
492     for (j = 0; j < n; j++) {
493       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
494       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
495       PetscCall(VecResetArray(bmat->right));
496 
497       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
498       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
499       PetscCall(VecResetArray(bmat->right));
500       jrow++;
501     }
502     PetscCall(VecResetArray(bmat->left));
503     PetscCall(VecResetArray(bmat->middle));
504   }
505   PetscCall(VecRestoreArray(x, &xx));
506   PetscCall(VecRestoreArray(y, &yy));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
511 {
512   PetscFunctionBegin;
513   PetscFunctionReturn(PETSC_SUCCESS);
514 }
515 
516 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
517 {
518   PetscFunctionBegin;
519   PetscFunctionReturn(PETSC_SUCCESS);
520 }
521 
522 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
523 {
524   PetscFunctionBegin;
525   PetscFunctionReturn(PETSC_SUCCESS);
526 }
527 
528 /*
529      Adds diagonal pointers to sparse matrix structure.
530 */
531 static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
532 {
533   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
534   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;
535 
536   PetscFunctionBegin;
537   if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
538   for (i = 0; i < mbs; i++) {
539     a->diag[i] = a->i[i + 1];
540     for (j = a->i[i]; j < a->i[i + 1]; j++) {
541       if (a->j[j] == i) {
542         a->diag[i] = j;
543         break;
544       }
545     }
546   }
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
551 {
552   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
553   Mat_SeqAIJ   *c;
554   PetscInt      i, k, first, step, lensi, nrows, ncols;
555   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
556   PetscScalar  *a_new;
557   Mat           C, *aa = a->a;
558   PetscBool     stride, equal;
559 
560   PetscFunctionBegin;
561   PetscCall(ISEqual(isrow, iscol, &equal));
562   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
563   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
564   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
565   PetscCall(ISStrideGetInfo(iscol, &first, &step));
566   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
567 
568   PetscCall(ISGetLocalSize(isrow, &nrows));
569   ncols = nrows;
570 
571   /* create submatrix */
572   if (scall == MAT_REUSE_MATRIX) {
573     PetscInt n_cols, n_rows;
574     C = *B;
575     PetscCall(MatGetSize(C, &n_rows, &n_cols));
576     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
577     PetscCall(MatZeroEntries(C));
578   } else {
579     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
580     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
581     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
582     else PetscCall(MatSetType(C, MATSEQAIJ));
583     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
584     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
585   }
586   c = (Mat_SeqAIJ *)C->data;
587 
588   /* loop over rows inserting into submatrix */
589   a_new = c->a;
590   j_new = c->j;
591   i_new = c->i;
592 
593   for (i = 0; i < nrows; i++) {
594     lensi = ailen[i];
595     for (k = 0; k < lensi; k++) {
596       *j_new++ = *aj++;
597       PetscCall(MatGetValue(*aa++, first, first, a_new++));
598     }
599     i_new[i + 1] = i_new[i] + lensi;
600     c->ilen[i]   = lensi;
601   }
602 
603   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
604   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
605   *B = C;
606   PetscFunctionReturn(PETSC_SUCCESS);
607 }
608 
609 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
610 {
611   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
612   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
613   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
614   Mat          *aa = a->a, *ap;
615 
616   PetscFunctionBegin;
617   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
618 
619   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
620   for (i = 1; i < m; i++) {
621     /* move each row back by the amount of empty slots (fshift) before it*/
622     fshift += imax[i - 1] - ailen[i - 1];
623     rmax = PetscMax(rmax, ailen[i]);
624     if (fshift) {
625       ip = aj + ai[i];
626       ap = aa + ai[i];
627       N  = ailen[i];
628       for (j = 0; j < N; j++) {
629         ip[j - fshift] = ip[j];
630         ap[j - fshift] = ap[j];
631       }
632     }
633     ai[i] = ai[i - 1] + ailen[i - 1];
634   }
635   if (m) {
636     fshift += imax[m - 1] - ailen[m - 1];
637     ai[m] = ai[m - 1] + ailen[m - 1];
638   }
639   /* reset ilen and imax for each row */
640   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
641   a->nz = ai[m];
642   for (i = 0; i < a->nz; i++) {
643     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
644     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
645     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
646   }
647   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
648   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
649   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
650 
651   A->info.mallocs += a->reallocs;
652   a->reallocs         = 0;
653   A->info.nz_unneeded = (double)fshift;
654   a->rmax             = rmax;
655   PetscCall(MatMarkDiagonal_BlockMat(A));
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
660 {
661   PetscFunctionBegin;
662   if (opt == MAT_SYMMETRIC && flg) {
663     A->ops->sor  = MatSOR_BlockMat_Symmetric;
664     A->ops->mult = MatMult_BlockMat_Symmetric;
665   } else {
666     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
667   }
668   PetscFunctionReturn(PETSC_SUCCESS);
669 }
670 
671 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
672                                        NULL,
673                                        NULL,
674                                        MatMult_BlockMat,
675                                        /*  4*/ MatMultAdd_BlockMat,
676                                        MatMultTranspose_BlockMat,
677                                        MatMultTransposeAdd_BlockMat,
678                                        NULL,
679                                        NULL,
680                                        NULL,
681                                        /* 10*/ NULL,
682                                        NULL,
683                                        NULL,
684                                        MatSOR_BlockMat,
685                                        NULL,
686                                        /* 15*/ NULL,
687                                        NULL,
688                                        NULL,
689                                        NULL,
690                                        NULL,
691                                        /* 20*/ NULL,
692                                        MatAssemblyEnd_BlockMat,
693                                        MatSetOption_BlockMat,
694                                        NULL,
695                                        /* 24*/ NULL,
696                                        NULL,
697                                        NULL,
698                                        NULL,
699                                        NULL,
700                                        /* 29*/ NULL,
701                                        NULL,
702                                        NULL,
703                                        NULL,
704                                        NULL,
705                                        /* 34*/ NULL,
706                                        NULL,
707                                        NULL,
708                                        NULL,
709                                        NULL,
710                                        /* 39*/ NULL,
711                                        NULL,
712                                        NULL,
713                                        NULL,
714                                        NULL,
715                                        /* 44*/ NULL,
716                                        NULL,
717                                        MatShift_Basic,
718                                        NULL,
719                                        NULL,
720                                        /* 49*/ NULL,
721                                        NULL,
722                                        NULL,
723                                        NULL,
724                                        NULL,
725                                        /* 54*/ NULL,
726                                        NULL,
727                                        NULL,
728                                        NULL,
729                                        NULL,
730                                        /* 59*/ MatCreateSubMatrix_BlockMat,
731                                        MatDestroy_BlockMat,
732                                        MatView_BlockMat,
733                                        NULL,
734                                        NULL,
735                                        /* 64*/ NULL,
736                                        NULL,
737                                        NULL,
738                                        NULL,
739                                        NULL,
740                                        /* 69*/ NULL,
741                                        NULL,
742                                        NULL,
743                                        NULL,
744                                        NULL,
745                                        /* 74*/ NULL,
746                                        NULL,
747                                        NULL,
748                                        NULL,
749                                        NULL,
750                                        /* 79*/ NULL,
751                                        NULL,
752                                        NULL,
753                                        NULL,
754                                        MatLoad_BlockMat,
755                                        /* 84*/ NULL,
756                                        NULL,
757                                        NULL,
758                                        NULL,
759                                        NULL,
760                                        /* 89*/ NULL,
761                                        NULL,
762                                        NULL,
763                                        NULL,
764                                        NULL,
765                                        /* 94*/ NULL,
766                                        NULL,
767                                        NULL,
768                                        NULL,
769                                        NULL,
770                                        /* 99*/ NULL,
771                                        NULL,
772                                        NULL,
773                                        NULL,
774                                        NULL,
775                                        /*104*/ NULL,
776                                        NULL,
777                                        NULL,
778                                        NULL,
779                                        NULL,
780                                        /*109*/ NULL,
781                                        NULL,
782                                        NULL,
783                                        NULL,
784                                        NULL,
785                                        /*114*/ NULL,
786                                        NULL,
787                                        NULL,
788                                        NULL,
789                                        NULL,
790                                        /*119*/ NULL,
791                                        NULL,
792                                        NULL,
793                                        NULL,
794                                        NULL,
795                                        /*124*/ NULL,
796                                        NULL,
797                                        NULL,
798                                        NULL,
799                                        NULL,
800                                        /*129*/ NULL,
801                                        NULL,
802                                        NULL,
803                                        NULL,
804                                        NULL,
805                                        /*134*/ NULL,
806                                        NULL,
807                                        NULL,
808                                        NULL,
809                                        NULL,
810                                        /*139*/ NULL,
811                                        NULL,
812                                        NULL,
813                                        NULL,
814                                        NULL,
815                                        /*144*/ NULL,
816                                        NULL,
817                                        NULL,
818                                        NULL,
819                                        NULL,
820                                        NULL,
821                                        /*150*/ NULL,
822                                        NULL,
823                                        NULL};
824 
825 /*@C
826   MatBlockMatSetPreallocation - For good matrix assembly performance
827   the user should preallocate the matrix storage by setting the parameter nz
828   (or the array nnz).  By setting these parameters accurately, performance
829   during matrix assembly can be increased by more than a factor of 50.
830 
831   Collective
832 
833   Input Parameters:
834 + B   - The matrix
835 . bs  - size of each block in matrix
836 . nz  - number of nonzeros per block row (same for all rows)
837 - nnz - array containing the number of nonzeros in the various block rows
838          (possibly different for each row) or `NULL`
839 
840   Level: intermediate
841 
842   Notes:
843   If `nnz` is given then `nz` is ignored
844 
845   Specify the preallocated storage with either `nz` or `nnz` (not both).
846   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
847   allocation.
848 
849 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
850 @*/
851 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
852 {
853   PetscFunctionBegin;
854   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
859 {
860   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
861   PetscInt      i;
862 
863   PetscFunctionBegin;
864   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
865   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
866   PetscCall(PetscLayoutSetUp(A->rmap));
867   PetscCall(PetscLayoutSetUp(A->cmap));
868   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
869 
870   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
871   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
872   if (nnz) {
873     for (i = 0; i < A->rmap->n / bs; i++) {
874       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
875       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
876     }
877   }
878   bmat->mbs = A->rmap->n / bs;
879 
880   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
881   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
882   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
883 
884   if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
885   if (PetscLikely(nnz)) {
886     nz = 0;
887     for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
888       bmat->imax[i] = nnz[i];
889       nz += nnz[i];
890     }
891   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
892 
893   /* bmat->ilen will count nonzeros in each row so far. */
894   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
895 
896   /* allocate the matrix space */
897   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
898   PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
899   bmat->i[0] = 0;
900   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
901   bmat->singlemalloc = PETSC_TRUE;
902   bmat->free_a       = PETSC_TRUE;
903   bmat->free_ij      = PETSC_TRUE;
904 
905   bmat->nz            = 0;
906   bmat->maxnz         = nz;
907   A->info.nz_unneeded = (double)bmat->maxnz;
908   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 /*MC
913    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
914                  consisting of (usually) sparse blocks.
915 
916   Level: advanced
917 
918 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
919 M*/
920 
921 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
922 {
923   Mat_BlockMat *b;
924 
925   PetscFunctionBegin;
926   PetscCall(PetscNew(&b));
927   A->data         = (void *)b;
928   A->ops[0]       = MatOps_Values;
929   A->assembled    = PETSC_TRUE;
930   A->preallocated = PETSC_FALSE;
931   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
932 
933   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
934   PetscFunctionReturn(PETSC_SUCCESS);
935 }
936 
937 /*@C
938   MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
939 
940   Collective
941 
942   Input Parameters:
943 + comm - MPI communicator
944 . m    - number of rows
945 . n    - number of columns
946 . bs   - size of each submatrix
947 . nz   - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
948 - nnz  - expected number of nonzers per block row if known (use `NULL` otherwise)
949 
950   Output Parameter:
951 . A - the matrix
952 
953   Level: intermediate
954 
955   Notes:
956   Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
957   have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
958 
959   For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
960 
961   Developer Notes:
962   I don't like the name, it is not `MATNESTMAT`
963 
964 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
965 @*/
966 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
967 {
968   PetscFunctionBegin;
969   PetscCall(MatCreate(comm, A));
970   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
971   PetscCall(MatSetType(*A, MATBLOCKMAT));
972   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
973   PetscFunctionReturn(PETSC_SUCCESS);
974 }
975