xref: /petsc/src/mat/impls/blockmat/seq/blockmat.c (revision 5b12497f4ab2fae16b3dbcff7d3c608aa116b75c)
1 /*
2    This provides a matrix that consists of Mats
3 */
4 
5 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */
7 
8 typedef struct {
9   SEQAIJHEADER(Mat);
10   SEQBAIJHEADER;
11   Mat *diags;
12 
13   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
14 } Mat_BlockMat;
15 
16 MatGetDiagonalMarkers(BlockMat, A->rmap->bs)
17 
MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)18 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
19 {
20   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
21   PetscScalar       *x;
22   const Mat         *v;
23   const PetscScalar *b;
24   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
25   const PetscInt    *idx;
26   IS                 row, col;
27   MatFactorInfo      info;
28   Vec                left = a->left, right = a->right, middle = a->middle;
29   Mat               *diag;
30 
31   PetscFunctionBegin;
32   its = its * lits;
33   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
34   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
35   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
36   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
37   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
38 
39   if (!a->diags) {
40     const PetscInt *adiag;
41 
42     PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
43     PetscCall(PetscMalloc1(mbs, &a->diags));
44     PetscCall(MatFactorInfoInitialize(&info));
45     for (i = 0; i < mbs; i++) {
46       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
47       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[adiag[i]], row, &info));
48       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
49       PetscCall(ISDestroy(&row));
50       PetscCall(ISDestroy(&col));
51     }
52     PetscCall(VecDuplicate(bb, &a->workb));
53   }
54   diag = a->diags;
55 
56   PetscCall(VecSet(xx, 0.0));
57   PetscCall(VecGetArray(xx, &x));
58   /* copy right-hand side because it must be modified during iteration */
59   PetscCall(VecCopy(bb, a->workb));
60   PetscCall(VecGetArrayRead(a->workb, &b));
61 
62   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
63   while (its--) {
64     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
65       for (i = 0; i < mbs; i++) {
66         n   = a->i[i + 1] - a->i[i] - 1;
67         idx = a->j + a->i[i] + 1;
68         v   = a->a + a->i[i] + 1;
69 
70         PetscCall(VecSet(left, 0.0));
71         for (j = 0; j < n; j++) {
72           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
73           PetscCall(MatMultAdd(v[j], right, left, left));
74           PetscCall(VecResetArray(right));
75         }
76         PetscCall(VecPlaceArray(right, b + i * bs));
77         PetscCall(VecAYPX(left, -1.0, right));
78         PetscCall(VecResetArray(right));
79 
80         PetscCall(VecPlaceArray(right, x + i * bs));
81         PetscCall(MatSolve(diag[i], left, right));
82 
83         /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
84         for (j = 0; j < n; j++) {
85           PetscCall(MatMultTranspose(v[j], right, left));
86           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
87           PetscCall(VecAXPY(middle, -1.0, left));
88           PetscCall(VecResetArray(middle));
89         }
90         PetscCall(VecResetArray(right));
91       }
92     }
93     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94       for (i = mbs - 1; i >= 0; i--) {
95         n   = a->i[i + 1] - a->i[i] - 1;
96         idx = a->j + a->i[i] + 1;
97         v   = a->a + a->i[i] + 1;
98 
99         PetscCall(VecSet(left, 0.0));
100         for (j = 0; j < n; j++) {
101           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
102           PetscCall(MatMultAdd(v[j], right, left, left));
103           PetscCall(VecResetArray(right));
104         }
105         PetscCall(VecPlaceArray(right, b + i * bs));
106         PetscCall(VecAYPX(left, -1.0, right));
107         PetscCall(VecResetArray(right));
108 
109         PetscCall(VecPlaceArray(right, x + i * bs));
110         PetscCall(MatSolve(diag[i], left, right));
111         PetscCall(VecResetArray(right));
112       }
113     }
114   }
115   PetscCall(VecRestoreArray(xx, &x));
116   PetscCall(VecRestoreArrayRead(a->workb, &b));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)120 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
121 {
122   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
123   PetscScalar       *x;
124   const Mat         *v;
125   const PetscScalar *b;
126   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
127   const PetscInt    *idx;
128   IS                 row, col;
129   MatFactorInfo      info;
130   Vec                left = a->left, right = a->right;
131   Mat               *diag;
132 
133   PetscFunctionBegin;
134   its = its * lits;
135   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
136   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
137   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
138   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
139 
140   if (!a->diags) {
141     const PetscInt *adiag;
142 
143     PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
144     PetscCall(PetscMalloc1(mbs, &a->diags));
145     PetscCall(MatFactorInfoInitialize(&info));
146     for (i = 0; i < mbs; i++) {
147       PetscCall(MatGetOrdering(a->a[adiag[i]], MATORDERINGND, &row, &col));
148       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[adiag[i]], row, col, &info));
149       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
150       PetscCall(ISDestroy(&row));
151       PetscCall(ISDestroy(&col));
152     }
153   }
154   diag = a->diags;
155 
156   PetscCall(VecSet(xx, 0.0));
157   PetscCall(VecGetArray(xx, &x));
158   PetscCall(VecGetArrayRead(bb, &b));
159 
160   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
161   while (its--) {
162     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
163       for (i = 0; i < mbs; i++) {
164         n   = a->i[i + 1] - a->i[i];
165         idx = a->j + a->i[i];
166         v   = a->a + a->i[i];
167 
168         PetscCall(VecSet(left, 0.0));
169         for (j = 0; j < n; j++) {
170           if (idx[j] != i) {
171             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
172             PetscCall(MatMultAdd(v[j], right, left, left));
173             PetscCall(VecResetArray(right));
174           }
175         }
176         PetscCall(VecPlaceArray(right, b + i * bs));
177         PetscCall(VecAYPX(left, -1.0, right));
178         PetscCall(VecResetArray(right));
179 
180         PetscCall(VecPlaceArray(right, x + i * bs));
181         PetscCall(MatSolve(diag[i], left, right));
182         PetscCall(VecResetArray(right));
183       }
184     }
185     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186       for (i = mbs - 1; i >= 0; i--) {
187         n   = a->i[i + 1] - a->i[i];
188         idx = a->j + a->i[i];
189         v   = a->a + a->i[i];
190 
191         PetscCall(VecSet(left, 0.0));
192         for (j = 0; j < n; j++) {
193           if (idx[j] != i) {
194             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
195             PetscCall(MatMultAdd(v[j], right, left, left));
196             PetscCall(VecResetArray(right));
197           }
198         }
199         PetscCall(VecPlaceArray(right, b + i * bs));
200         PetscCall(VecAYPX(left, -1.0, right));
201         PetscCall(VecResetArray(right));
202 
203         PetscCall(VecPlaceArray(right, x + i * bs));
204         PetscCall(MatSolve(diag[i], left, right));
205         PetscCall(VecResetArray(right));
206       }
207     }
208   }
209   PetscCall(VecRestoreArray(xx, &x));
210   PetscCall(VecRestoreArrayRead(bb, &b));
211   PetscFunctionReturn(PETSC_SUCCESS);
212 }
213 
MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)214 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
215 {
216   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
217   PetscInt     *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1;
218   PetscInt     *ai = a->i, *ailen = a->ilen;
219   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
220   PetscInt      ridx, cidx;
221   PetscBool     roworiented = a->roworiented;
222   MatScalar     value;
223   Mat          *ap, *aa = a->a;
224 
225   PetscFunctionBegin;
226   for (k = 0; k < m; k++) { /* loop over added rows */
227     row  = im[k];
228     brow = row / bs;
229     if (row < 0) continue;
230     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
231     rp   = aj + ai[brow];
232     ap   = aa + ai[brow];
233     nrow = ailen[brow];
234     low  = 0;
235     high = nrow;
236     for (l = 0; l < n; l++) { /* loop over added columns */
237       if (in[l] < 0) continue;
238       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
239       col  = in[l];
240       bcol = col / bs;
241       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
242       ridx = row % bs;
243       cidx = col % bs;
244       if (roworiented) value = v[l + k * n];
245       else value = v[k + l * m];
246 
247       if (col <= lastcol) low = 0;
248       else high = nrow;
249       lastcol = col;
250       while (high - low > 7) {
251         t = (low + high) / 2;
252         if (rp[t] > bcol) high = t;
253         else low = t;
254       }
255       for (i = low; i < high; i++) {
256         if (rp[i] > bcol) break;
257         if (rp[i] == bcol) goto noinsert1;
258       }
259       if (nonew == 1) goto noinsert1;
260       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col);
261 #if 0
262       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
263       N = nrow++ - 1;
264       high++;
265       /* shift up all the later entries in this row */
266       for (ii = N; ii >= i; ii--) {
267         rp[ii + 1] = rp[ii];
268         ap[ii + 1] = ap[ii];
269       }
270       if (N >= i) ap[i] = NULL;
271       rp[i] = bcol;
272       a->nz++;
273 #endif
274     noinsert1:;
275       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
276       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
277       low = i;
278     }
279     ailen[brow] = nrow;
280   }
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
MatLoad_BlockMat(Mat newmat,PetscViewer viewer)284 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
285 {
286   Mat                tmpA;
287   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
288   const PetscInt    *cols;
289   const PetscScalar *values;
290   PetscBool          flg = PETSC_FALSE, notdone;
291   Mat_SeqAIJ        *a;
292   Mat_BlockMat      *amat;
293 
294   PetscFunctionBegin;
295   /* force binary viewer to load .info file if it has not yet done so */
296   PetscCall(PetscViewerSetUp(viewer));
297   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
298   PetscCall(MatSetType(tmpA, MATSEQAIJ));
299   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
300 
301   PetscCall(MatGetLocalSize(tmpA, &m, &n));
302   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
303   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
304   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
305   PetscOptionsEnd();
306 
307   /* Determine number of nonzero blocks for each block row */
308   a   = (Mat_SeqAIJ *)tmpA->data;
309   mbs = m / bs;
310   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
311   PetscCall(PetscArrayzero(lens, mbs));
312 
313   for (i = 0; i < mbs; i++) {
314     for (j = 0; j < bs; j++) {
315       ii[j]    = a->j + a->i[i * bs + j];
316       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
317     }
318 
319     currentcol = -1;
320     while (PETSC_TRUE) {
321       notdone = PETSC_FALSE;
322       nextcol = 1000000000;
323       for (j = 0; j < bs; j++) {
324         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
325           ii[j]++;
326           ilens[j]--;
327         }
328         if (ilens[j] > 0) {
329           notdone = PETSC_TRUE;
330           nextcol = PetscMin(nextcol, ii[j][0] / bs);
331         }
332       }
333       if (!notdone) break;
334       if (!flg || (nextcol >= i)) lens[i]++;
335       currentcol = nextcol;
336     }
337   }
338 
339   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
340   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
341   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
342   amat = (Mat_BlockMat *)newmat->data;
343 
344   /* preallocate the submatrices */
345   PetscCall(PetscMalloc1(bs, &llens));
346   for (i = 0; i < mbs; i++) { /* loops for block rows */
347     for (j = 0; j < bs; j++) {
348       ii[j]    = a->j + a->i[i * bs + j];
349       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
350     }
351 
352     currentcol = 1000000000;
353     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
354       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
355     }
356 
357     while (PETSC_TRUE) { /* loops over blocks in block row */
358       notdone = PETSC_FALSE;
359       nextcol = 1000000000;
360       PetscCall(PetscArrayzero(llens, bs));
361       for (j = 0; j < bs; j++) {                              /* loop over rows in block */
362         while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
363           ii[j]++;
364           ilens[j]--;
365           llens[j]++;
366         }
367         if (ilens[j] > 0) {
368           notdone = PETSC_TRUE;
369           nextcol = PetscMin(nextcol, ii[j][0] / bs);
370         }
371       }
372       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
373       if (!flg || currentcol >= i) {
374         amat->j[cnt] = currentcol;
375         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
376       }
377 
378       if (!notdone) break;
379       currentcol = nextcol;
380     }
381     amat->ilen[i] = lens[i];
382   }
383 
384   PetscCall(PetscFree3(lens, ii, ilens));
385   PetscCall(PetscFree(llens));
386 
387   /* copy over the matrix, one row at a time */
388   for (i = 0; i < m; i++) {
389     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
390     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
391     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
392   }
393   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
394   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
MatView_BlockMat(Mat A,PetscViewer viewer)398 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
399 {
400   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
401   const char       *name;
402   PetscViewerFormat format;
403 
404   PetscFunctionBegin;
405   PetscCall(PetscObjectGetName((PetscObject)A, &name));
406   PetscCall(PetscViewerGetFormat(viewer, &format));
407   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
408     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
409     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
410   }
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
MatDestroy_BlockMat(Mat mat)414 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
415 {
416   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
417   PetscInt      i;
418 
419   PetscFunctionBegin;
420   PetscCall(VecDestroy(&bmat->right));
421   PetscCall(VecDestroy(&bmat->left));
422   PetscCall(VecDestroy(&bmat->middle));
423   PetscCall(VecDestroy(&bmat->workb));
424   if (bmat->diags) {
425     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
426   }
427   if (bmat->a) {
428     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
429   }
430   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
431   PetscCall(PetscFree(mat->data));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
MatMult_BlockMat(Mat A,Vec x,Vec y)435 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
436 {
437   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
438   PetscScalar  *xx, *yy;
439   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
440   Mat          *aa;
441 
442   PetscFunctionBegin;
443   /*
444      Standard CSR multiply except each entry is a Mat
445   */
446   PetscCall(VecGetArray(x, &xx));
447 
448   PetscCall(VecSet(y, 0.0));
449   PetscCall(VecGetArray(y, &yy));
450   aj = bmat->j;
451   aa = bmat->a;
452   ii = bmat->i;
453   for (i = 0; i < m; i++) {
454     jrow = ii[i];
455     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
456     n = ii[i + 1] - jrow;
457     for (j = 0; j < n; j++) {
458       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
459       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
460       PetscCall(VecResetArray(bmat->right));
461       jrow++;
462     }
463     PetscCall(VecResetArray(bmat->left));
464   }
465   PetscCall(VecRestoreArray(x, &xx));
466   PetscCall(VecRestoreArray(y, &yy));
467   PetscFunctionReturn(PETSC_SUCCESS);
468 }
469 
MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)470 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
471 {
472   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
473   PetscScalar  *xx, *yy;
474   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
475   Mat          *aa;
476 
477   PetscFunctionBegin;
478   /*
479      Standard CSR multiply except each entry is a Mat
480   */
481   PetscCall(VecGetArray(x, &xx));
482 
483   PetscCall(VecSet(y, 0.0));
484   PetscCall(VecGetArray(y, &yy));
485   aj = bmat->j;
486   aa = bmat->a;
487   ii = bmat->i;
488   for (i = 0; i < m; i++) {
489     jrow = ii[i];
490     n    = ii[i + 1] - jrow;
491     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
492     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
493     /* if we ALWAYS required a diagonal entry then could remove this if test */
494     if (aj[jrow] == i) {
495       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
496       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
497       PetscCall(VecResetArray(bmat->right));
498       jrow++;
499       n--;
500     }
501     for (j = 0; j < n; j++) {
502       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
503       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
504       PetscCall(VecResetArray(bmat->right));
505 
506       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
507       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
508       PetscCall(VecResetArray(bmat->right));
509       jrow++;
510     }
511     PetscCall(VecResetArray(bmat->left));
512     PetscCall(VecResetArray(bmat->middle));
513   }
514   PetscCall(VecRestoreArray(x, &xx));
515   PetscCall(VecRestoreArray(y, &yy));
516   PetscFunctionReturn(PETSC_SUCCESS);
517 }
518 
MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)519 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
520 {
521   PetscFunctionBegin;
522   PetscFunctionReturn(PETSC_SUCCESS);
523 }
524 
MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)525 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
526 {
527   PetscFunctionBegin;
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)531 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
532 {
533   PetscFunctionBegin;
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)537 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
538 {
539   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
540   Mat_SeqAIJ   *c;
541   PetscInt      i, k, first, step, lensi, nrows, ncols;
542   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
543   PetscScalar  *a_new;
544   Mat           C, *aa = a->a;
545   PetscBool     stride, equal;
546 
547   PetscFunctionBegin;
548   PetscCall(ISEqual(isrow, iscol, &equal));
549   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
550   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
551   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
552   PetscCall(ISStrideGetInfo(iscol, &first, &step));
553   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
554 
555   PetscCall(ISGetLocalSize(isrow, &nrows));
556   ncols = nrows;
557 
558   /* create submatrix */
559   if (scall == MAT_REUSE_MATRIX) {
560     PetscInt n_cols, n_rows;
561     C = *B;
562     PetscCall(MatGetSize(C, &n_rows, &n_cols));
563     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
564     PetscCall(MatZeroEntries(C));
565   } else {
566     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
567     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
568     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
569     else PetscCall(MatSetType(C, MATSEQAIJ));
570     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
571     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
572   }
573   c = (Mat_SeqAIJ *)C->data;
574 
575   /* loop over rows inserting into submatrix */
576   a_new = c->a;
577   j_new = c->j;
578   i_new = c->i;
579 
580   for (i = 0; i < nrows; i++) {
581     lensi = ailen[i];
582     for (k = 0; k < lensi; k++) {
583       *j_new++ = *aj++;
584       PetscCall(MatGetValue(*aa++, first, first, a_new++));
585     }
586     i_new[i + 1] = i_new[i] + lensi;
587     c->ilen[i]   = lensi;
588   }
589 
590   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
591   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
592   *B = C;
593   PetscFunctionReturn(PETSC_SUCCESS);
594 }
595 
MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)596 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
597 {
598   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
599   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
600   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
601   Mat          *aa = a->a, *ap;
602 
603   PetscFunctionBegin;
604   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
605 
606   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
607   for (i = 1; i < m; i++) {
608     /* move each row back by the amount of empty slots (fshift) before it*/
609     fshift += imax[i - 1] - ailen[i - 1];
610     rmax = PetscMax(rmax, ailen[i]);
611     if (fshift) {
612       ip = aj + ai[i];
613       ap = aa + ai[i];
614       N  = ailen[i];
615       for (j = 0; j < N; j++) {
616         ip[j - fshift] = ip[j];
617         ap[j - fshift] = ap[j];
618       }
619     }
620     ai[i] = ai[i - 1] + ailen[i - 1];
621   }
622   if (m) {
623     fshift += imax[m - 1] - ailen[m - 1];
624     ai[m] = ai[m - 1] + ailen[m - 1];
625   }
626   /* reset ilen and imax for each row */
627   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
628   a->nz = ai[m];
629   for (i = 0; i < a->nz; i++) {
630     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
631     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
632     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
633   }
634   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
635   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
636   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
637 
638   A->info.mallocs += a->reallocs;
639   a->reallocs         = 0;
640   A->info.nz_unneeded = (double)fshift;
641   a->rmax             = rmax;
642   PetscFunctionReturn(PETSC_SUCCESS);
643 }
644 
MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)645 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
646 {
647   PetscFunctionBegin;
648   if (opt == MAT_SYMMETRIC && flg) {
649     A->ops->sor  = MatSOR_BlockMat_Symmetric;
650     A->ops->mult = MatMult_BlockMat_Symmetric;
651   } else {
652     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
653   }
654   PetscFunctionReturn(PETSC_SUCCESS);
655 }
656 
657 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
658                                        NULL,
659                                        NULL,
660                                        MatMult_BlockMat,
661                                        /*  4*/ MatMultAdd_BlockMat,
662                                        MatMultTranspose_BlockMat,
663                                        MatMultTransposeAdd_BlockMat,
664                                        NULL,
665                                        NULL,
666                                        NULL,
667                                        /* 10*/ NULL,
668                                        NULL,
669                                        NULL,
670                                        MatSOR_BlockMat,
671                                        NULL,
672                                        /* 15*/ NULL,
673                                        NULL,
674                                        NULL,
675                                        NULL,
676                                        NULL,
677                                        /* 20*/ NULL,
678                                        MatAssemblyEnd_BlockMat,
679                                        MatSetOption_BlockMat,
680                                        NULL,
681                                        /* 24*/ NULL,
682                                        NULL,
683                                        NULL,
684                                        NULL,
685                                        NULL,
686                                        /* 29*/ NULL,
687                                        NULL,
688                                        NULL,
689                                        NULL,
690                                        NULL,
691                                        /* 34*/ NULL,
692                                        NULL,
693                                        NULL,
694                                        NULL,
695                                        NULL,
696                                        /* 39*/ NULL,
697                                        NULL,
698                                        NULL,
699                                        NULL,
700                                        NULL,
701                                        /* 44*/ NULL,
702                                        NULL,
703                                        MatShift_Basic,
704                                        NULL,
705                                        NULL,
706                                        /* 49*/ NULL,
707                                        NULL,
708                                        NULL,
709                                        NULL,
710                                        NULL,
711                                        /* 54*/ NULL,
712                                        NULL,
713                                        NULL,
714                                        NULL,
715                                        NULL,
716                                        /* 59*/ MatCreateSubMatrix_BlockMat,
717                                        MatDestroy_BlockMat,
718                                        MatView_BlockMat,
719                                        NULL,
720                                        NULL,
721                                        /* 64*/ NULL,
722                                        NULL,
723                                        NULL,
724                                        NULL,
725                                        NULL,
726                                        /* 69*/ NULL,
727                                        NULL,
728                                        NULL,
729                                        NULL,
730                                        NULL,
731                                        /* 74*/ NULL,
732                                        NULL,
733                                        NULL,
734                                        NULL,
735                                        MatLoad_BlockMat,
736                                        /* 79*/ NULL,
737                                        NULL,
738                                        NULL,
739                                        NULL,
740                                        NULL,
741                                        /* 84*/ NULL,
742                                        NULL,
743                                        NULL,
744                                        NULL,
745                                        NULL,
746                                        /* 89*/ NULL,
747                                        NULL,
748                                        NULL,
749                                        NULL,
750                                        NULL,
751                                        /* 94*/ NULL,
752                                        NULL,
753                                        NULL,
754                                        NULL,
755                                        NULL,
756                                        /* 99*/ NULL,
757                                        NULL,
758                                        NULL,
759                                        NULL,
760                                        NULL,
761                                        /*104*/ NULL,
762                                        NULL,
763                                        NULL,
764                                        NULL,
765                                        NULL,
766                                        /*109*/ NULL,
767                                        NULL,
768                                        NULL,
769                                        NULL,
770                                        NULL,
771                                        /*114*/ NULL,
772                                        NULL,
773                                        NULL,
774                                        NULL,
775                                        NULL,
776                                        /*119*/ NULL,
777                                        NULL,
778                                        NULL,
779                                        NULL,
780                                        NULL,
781                                        /*124*/ NULL,
782                                        NULL,
783                                        NULL,
784                                        NULL,
785                                        NULL,
786                                        /*129*/ NULL,
787                                        NULL,
788                                        NULL,
789                                        NULL,
790                                        NULL,
791                                        /*134*/ NULL,
792                                        NULL,
793                                        NULL,
794                                        NULL,
795                                        NULL,
796                                        /*139*/ NULL,
797                                        NULL,
798                                        NULL,
799                                        NULL,
800                                        NULL};
801 
802 /*@C
803   MatBlockMatSetPreallocation - For good matrix assembly performance
804   the user should preallocate the matrix storage by setting the parameter nz
805   (or the array nnz).  By setting these parameters accurately, performance
806   during matrix assembly can be increased by more than a factor of 50.
807 
808   Collective
809 
810   Input Parameters:
811 + B   - The matrix
812 . bs  - size of each block in matrix
813 . nz  - number of nonzeros per block row (same for all rows)
814 - nnz - array containing the number of nonzeros in the various block rows
815          (possibly different for each row) or `NULL`
816 
817   Level: intermediate
818 
819   Notes:
820   If `nnz` is given then `nz` is ignored
821 
822   Specify the preallocated storage with either `nz` or `nnz` (not both).
823   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
824   allocation.
825 
826 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
827 @*/
MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])828 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
829 {
830   PetscFunctionBegin;
831   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt * nnz)835 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
836 {
837   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
838   PetscInt      i;
839 
840   PetscFunctionBegin;
841   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
842   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
843   PetscCall(PetscLayoutSetUp(A->rmap));
844   PetscCall(PetscLayoutSetUp(A->cmap));
845   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
846 
847   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
848   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
849   if (nnz) {
850     for (i = 0; i < A->rmap->n / bs; i++) {
851       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
852       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
853     }
854   }
855   bmat->mbs = A->rmap->n / bs;
856 
857   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
858   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
859   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
860 
861   if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
862   PetscCheck(PetscLikely(nnz), PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
863   nz = 0;
864   for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
865     bmat->imax[i] = nnz[i];
866     nz += nnz[i];
867   }
868 
869   /* bmat->ilen will count nonzeros in each row so far. */
870   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
871 
872   /* allocate the matrix space */
873   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
874   PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a));
875   PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j));
876   PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i));
877   bmat->free_a  = PETSC_TRUE;
878   bmat->free_ij = PETSC_TRUE;
879 
880   bmat->i[0] = 0;
881   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
882 
883   bmat->nz            = 0;
884   bmat->maxnz         = nz;
885   A->info.nz_unneeded = (double)bmat->maxnz;
886   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
887   PetscFunctionReturn(PETSC_SUCCESS);
888 }
889 
890 /*MC
891    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
892                  consisting of (usually) sparse blocks.
893 
894   Level: advanced
895 
896 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
897 M*/
898 
MatCreate_BlockMat(Mat A)899 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
900 {
901   Mat_BlockMat *b;
902 
903   PetscFunctionBegin;
904   PetscCall(PetscNew(&b));
905   A->data         = (void *)b;
906   A->ops[0]       = MatOps_Values;
907   A->assembled    = PETSC_TRUE;
908   A->preallocated = PETSC_FALSE;
909   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
910 
911   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
912   PetscFunctionReturn(PETSC_SUCCESS);
913 }
914 
915 /*@C
916   MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
917 
918   Collective
919 
920   Input Parameters:
921 + comm - MPI communicator
922 . m    - number of rows
923 . n    - number of columns
924 . bs   - size of each submatrix
925 . nz   - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
926 - nnz  - expected number of nonzers per block row if known (use `NULL` otherwise)
927 
928   Output Parameter:
929 . A - the matrix
930 
931   Level: intermediate
932 
933   Notes:
934   Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
935   have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.
936 
937   For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
938 
939   Developer Notes:
940   I don't like the name, it is not `MATNESTMAT`
941 
942 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
943 @*/
MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt * nnz,Mat * A)944 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
945 {
946   PetscFunctionBegin;
947   PetscCall(MatCreate(comm, A));
948   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
949   PetscCall(MatSetType(*A, MATBLOCKMAT));
950   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
951   PetscFunctionReturn(PETSC_SUCCESS);
952 }
953