1 /*
2 This provides a matrix that consists of Mats
3 */
4
5 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
6 #include <../src/mat/impls/baij/seq/baij.h> /* use the common AIJ data-structure */
7
8 typedef struct {
9 SEQAIJHEADER(Mat);
10 SEQBAIJHEADER;
11 Mat *diags;
12
13 Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
14 } Mat_BlockMat;
15
16 MatGetDiagonalMarkers(BlockMat, A->rmap->bs)
17
MatSOR_BlockMat_Symmetric(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)18 static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
19 {
20 Mat_BlockMat *a = (Mat_BlockMat *)A->data;
21 PetscScalar *x;
22 const Mat *v;
23 const PetscScalar *b;
24 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
25 const PetscInt *idx;
26 IS row, col;
27 MatFactorInfo info;
28 Vec left = a->left, right = a->right, middle = a->middle;
29 Mat *diag;
30
31 PetscFunctionBegin;
32 its = its * lits;
33 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
34 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
35 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
36 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
37 PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
38
39 if (!a->diags) {
40 const PetscInt *adiag;
41
42 PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
43 PetscCall(PetscMalloc1(mbs, &a->diags));
44 PetscCall(MatFactorInfoInitialize(&info));
45 for (i = 0; i < mbs; i++) {
46 PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
47 PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[adiag[i]], row, &info));
48 PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
49 PetscCall(ISDestroy(&row));
50 PetscCall(ISDestroy(&col));
51 }
52 PetscCall(VecDuplicate(bb, &a->workb));
53 }
54 diag = a->diags;
55
56 PetscCall(VecSet(xx, 0.0));
57 PetscCall(VecGetArray(xx, &x));
58 /* copy right-hand side because it must be modified during iteration */
59 PetscCall(VecCopy(bb, a->workb));
60 PetscCall(VecGetArrayRead(a->workb, &b));
61
62 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
63 while (its--) {
64 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
65 for (i = 0; i < mbs; i++) {
66 n = a->i[i + 1] - a->i[i] - 1;
67 idx = a->j + a->i[i] + 1;
68 v = a->a + a->i[i] + 1;
69
70 PetscCall(VecSet(left, 0.0));
71 for (j = 0; j < n; j++) {
72 PetscCall(VecPlaceArray(right, x + idx[j] * bs));
73 PetscCall(MatMultAdd(v[j], right, left, left));
74 PetscCall(VecResetArray(right));
75 }
76 PetscCall(VecPlaceArray(right, b + i * bs));
77 PetscCall(VecAYPX(left, -1.0, right));
78 PetscCall(VecResetArray(right));
79
80 PetscCall(VecPlaceArray(right, x + i * bs));
81 PetscCall(MatSolve(diag[i], left, right));
82
83 /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
84 for (j = 0; j < n; j++) {
85 PetscCall(MatMultTranspose(v[j], right, left));
86 PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
87 PetscCall(VecAXPY(middle, -1.0, left));
88 PetscCall(VecResetArray(middle));
89 }
90 PetscCall(VecResetArray(right));
91 }
92 }
93 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
94 for (i = mbs - 1; i >= 0; i--) {
95 n = a->i[i + 1] - a->i[i] - 1;
96 idx = a->j + a->i[i] + 1;
97 v = a->a + a->i[i] + 1;
98
99 PetscCall(VecSet(left, 0.0));
100 for (j = 0; j < n; j++) {
101 PetscCall(VecPlaceArray(right, x + idx[j] * bs));
102 PetscCall(MatMultAdd(v[j], right, left, left));
103 PetscCall(VecResetArray(right));
104 }
105 PetscCall(VecPlaceArray(right, b + i * bs));
106 PetscCall(VecAYPX(left, -1.0, right));
107 PetscCall(VecResetArray(right));
108
109 PetscCall(VecPlaceArray(right, x + i * bs));
110 PetscCall(MatSolve(diag[i], left, right));
111 PetscCall(VecResetArray(right));
112 }
113 }
114 }
115 PetscCall(VecRestoreArray(xx, &x));
116 PetscCall(VecRestoreArrayRead(a->workb, &b));
117 PetscFunctionReturn(PETSC_SUCCESS);
118 }
119
MatSOR_BlockMat(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)120 static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
121 {
122 Mat_BlockMat *a = (Mat_BlockMat *)A->data;
123 PetscScalar *x;
124 const Mat *v;
125 const PetscScalar *b;
126 PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
127 const PetscInt *idx;
128 IS row, col;
129 MatFactorInfo info;
130 Vec left = a->left, right = a->right;
131 Mat *diag;
132
133 PetscFunctionBegin;
134 its = its * lits;
135 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
136 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
137 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
138 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
139
140 if (!a->diags) {
141 const PetscInt *adiag;
142
143 PetscCall(MatGetDiagonalMarkers_BlockMat(A, &adiag, NULL));
144 PetscCall(PetscMalloc1(mbs, &a->diags));
145 PetscCall(MatFactorInfoInitialize(&info));
146 for (i = 0; i < mbs; i++) {
147 PetscCall(MatGetOrdering(a->a[adiag[i]], MATORDERINGND, &row, &col));
148 PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[adiag[i]], row, col, &info));
149 PetscCall(MatLUFactorNumeric(a->diags[i], a->a[adiag[i]], &info));
150 PetscCall(ISDestroy(&row));
151 PetscCall(ISDestroy(&col));
152 }
153 }
154 diag = a->diags;
155
156 PetscCall(VecSet(xx, 0.0));
157 PetscCall(VecGetArray(xx, &x));
158 PetscCall(VecGetArrayRead(bb, &b));
159
160 /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
161 while (its--) {
162 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
163 for (i = 0; i < mbs; i++) {
164 n = a->i[i + 1] - a->i[i];
165 idx = a->j + a->i[i];
166 v = a->a + a->i[i];
167
168 PetscCall(VecSet(left, 0.0));
169 for (j = 0; j < n; j++) {
170 if (idx[j] != i) {
171 PetscCall(VecPlaceArray(right, x + idx[j] * bs));
172 PetscCall(MatMultAdd(v[j], right, left, left));
173 PetscCall(VecResetArray(right));
174 }
175 }
176 PetscCall(VecPlaceArray(right, b + i * bs));
177 PetscCall(VecAYPX(left, -1.0, right));
178 PetscCall(VecResetArray(right));
179
180 PetscCall(VecPlaceArray(right, x + i * bs));
181 PetscCall(MatSolve(diag[i], left, right));
182 PetscCall(VecResetArray(right));
183 }
184 }
185 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186 for (i = mbs - 1; i >= 0; i--) {
187 n = a->i[i + 1] - a->i[i];
188 idx = a->j + a->i[i];
189 v = a->a + a->i[i];
190
191 PetscCall(VecSet(left, 0.0));
192 for (j = 0; j < n; j++) {
193 if (idx[j] != i) {
194 PetscCall(VecPlaceArray(right, x + idx[j] * bs));
195 PetscCall(MatMultAdd(v[j], right, left, left));
196 PetscCall(VecResetArray(right));
197 }
198 }
199 PetscCall(VecPlaceArray(right, b + i * bs));
200 PetscCall(VecAYPX(left, -1.0, right));
201 PetscCall(VecResetArray(right));
202
203 PetscCall(VecPlaceArray(right, x + i * bs));
204 PetscCall(MatSolve(diag[i], left, right));
205 PetscCall(VecResetArray(right));
206 }
207 }
208 }
209 PetscCall(VecRestoreArray(xx, &x));
210 PetscCall(VecRestoreArrayRead(bb, &b));
211 PetscFunctionReturn(PETSC_SUCCESS);
212 }
213
MatSetValues_BlockMat(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)214 static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
215 {
216 Mat_BlockMat *a = (Mat_BlockMat *)A->data;
217 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1;
218 PetscInt *ai = a->i, *ailen = a->ilen;
219 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
220 PetscInt ridx, cidx;
221 PetscBool roworiented = a->roworiented;
222 MatScalar value;
223 Mat *ap, *aa = a->a;
224
225 PetscFunctionBegin;
226 for (k = 0; k < m; k++) { /* loop over added rows */
227 row = im[k];
228 brow = row / bs;
229 if (row < 0) continue;
230 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
231 rp = aj + ai[brow];
232 ap = aa + ai[brow];
233 nrow = ailen[brow];
234 low = 0;
235 high = nrow;
236 for (l = 0; l < n; l++) { /* loop over added columns */
237 if (in[l] < 0) continue;
238 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
239 col = in[l];
240 bcol = col / bs;
241 if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
242 ridx = row % bs;
243 cidx = col % bs;
244 if (roworiented) value = v[l + k * n];
245 else value = v[k + l * m];
246
247 if (col <= lastcol) low = 0;
248 else high = nrow;
249 lastcol = col;
250 while (high - low > 7) {
251 t = (low + high) / 2;
252 if (rp[t] > bcol) high = t;
253 else low = t;
254 }
255 for (i = low; i < high; i++) {
256 if (rp[i] > bcol) break;
257 if (rp[i] == bcol) goto noinsert1;
258 }
259 if (nonew == 1) goto noinsert1;
260 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col);
261 #if 0
262 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
263 N = nrow++ - 1;
264 high++;
265 /* shift up all the later entries in this row */
266 for (ii = N; ii >= i; ii--) {
267 rp[ii + 1] = rp[ii];
268 ap[ii + 1] = ap[ii];
269 }
270 if (N >= i) ap[i] = NULL;
271 rp[i] = bcol;
272 a->nz++;
273 #endif
274 noinsert1:;
275 if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
276 PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
277 low = i;
278 }
279 ailen[brow] = nrow;
280 }
281 PetscFunctionReturn(PETSC_SUCCESS);
282 }
283
MatLoad_BlockMat(Mat newmat,PetscViewer viewer)284 static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
285 {
286 Mat tmpA;
287 PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
288 const PetscInt *cols;
289 const PetscScalar *values;
290 PetscBool flg = PETSC_FALSE, notdone;
291 Mat_SeqAIJ *a;
292 Mat_BlockMat *amat;
293
294 PetscFunctionBegin;
295 /* force binary viewer to load .info file if it has not yet done so */
296 PetscCall(PetscViewerSetUp(viewer));
297 PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
298 PetscCall(MatSetType(tmpA, MATSEQAIJ));
299 PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
300
301 PetscCall(MatGetLocalSize(tmpA, &m, &n));
302 PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
303 PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
304 PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
305 PetscOptionsEnd();
306
307 /* Determine number of nonzero blocks for each block row */
308 a = (Mat_SeqAIJ *)tmpA->data;
309 mbs = m / bs;
310 PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
311 PetscCall(PetscArrayzero(lens, mbs));
312
313 for (i = 0; i < mbs; i++) {
314 for (j = 0; j < bs; j++) {
315 ii[j] = a->j + a->i[i * bs + j];
316 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
317 }
318
319 currentcol = -1;
320 while (PETSC_TRUE) {
321 notdone = PETSC_FALSE;
322 nextcol = 1000000000;
323 for (j = 0; j < bs; j++) {
324 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
325 ii[j]++;
326 ilens[j]--;
327 }
328 if (ilens[j] > 0) {
329 notdone = PETSC_TRUE;
330 nextcol = PetscMin(nextcol, ii[j][0] / bs);
331 }
332 }
333 if (!notdone) break;
334 if (!flg || (nextcol >= i)) lens[i]++;
335 currentcol = nextcol;
336 }
337 }
338
339 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
340 PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
341 if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
342 amat = (Mat_BlockMat *)newmat->data;
343
344 /* preallocate the submatrices */
345 PetscCall(PetscMalloc1(bs, &llens));
346 for (i = 0; i < mbs; i++) { /* loops for block rows */
347 for (j = 0; j < bs; j++) {
348 ii[j] = a->j + a->i[i * bs + j];
349 ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
350 }
351
352 currentcol = 1000000000;
353 for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
354 if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
355 }
356
357 while (PETSC_TRUE) { /* loops over blocks in block row */
358 notdone = PETSC_FALSE;
359 nextcol = 1000000000;
360 PetscCall(PetscArrayzero(llens, bs));
361 for (j = 0; j < bs; j++) { /* loop over rows in block */
362 while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
363 ii[j]++;
364 ilens[j]--;
365 llens[j]++;
366 }
367 if (ilens[j] > 0) {
368 notdone = PETSC_TRUE;
369 nextcol = PetscMin(nextcol, ii[j][0] / bs);
370 }
371 }
372 PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
373 if (!flg || currentcol >= i) {
374 amat->j[cnt] = currentcol;
375 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
376 }
377
378 if (!notdone) break;
379 currentcol = nextcol;
380 }
381 amat->ilen[i] = lens[i];
382 }
383
384 PetscCall(PetscFree3(lens, ii, ilens));
385 PetscCall(PetscFree(llens));
386
387 /* copy over the matrix, one row at a time */
388 for (i = 0; i < m; i++) {
389 PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
390 PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
391 PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
392 }
393 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
394 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
395 PetscFunctionReturn(PETSC_SUCCESS);
396 }
397
MatView_BlockMat(Mat A,PetscViewer viewer)398 static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
399 {
400 Mat_BlockMat *a = (Mat_BlockMat *)A->data;
401 const char *name;
402 PetscViewerFormat format;
403
404 PetscFunctionBegin;
405 PetscCall(PetscObjectGetName((PetscObject)A, &name));
406 PetscCall(PetscViewerGetFormat(viewer, &format));
407 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
408 PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
409 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
410 }
411 PetscFunctionReturn(PETSC_SUCCESS);
412 }
413
MatDestroy_BlockMat(Mat mat)414 static PetscErrorCode MatDestroy_BlockMat(Mat mat)
415 {
416 Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
417 PetscInt i;
418
419 PetscFunctionBegin;
420 PetscCall(VecDestroy(&bmat->right));
421 PetscCall(VecDestroy(&bmat->left));
422 PetscCall(VecDestroy(&bmat->middle));
423 PetscCall(VecDestroy(&bmat->workb));
424 if (bmat->diags) {
425 for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
426 }
427 if (bmat->a) {
428 for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
429 }
430 PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
431 PetscCall(PetscFree(mat->data));
432 PetscFunctionReturn(PETSC_SUCCESS);
433 }
434
MatMult_BlockMat(Mat A,Vec x,Vec y)435 static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
436 {
437 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
438 PetscScalar *xx, *yy;
439 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
440 Mat *aa;
441
442 PetscFunctionBegin;
443 /*
444 Standard CSR multiply except each entry is a Mat
445 */
446 PetscCall(VecGetArray(x, &xx));
447
448 PetscCall(VecSet(y, 0.0));
449 PetscCall(VecGetArray(y, &yy));
450 aj = bmat->j;
451 aa = bmat->a;
452 ii = bmat->i;
453 for (i = 0; i < m; i++) {
454 jrow = ii[i];
455 PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
456 n = ii[i + 1] - jrow;
457 for (j = 0; j < n; j++) {
458 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
459 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
460 PetscCall(VecResetArray(bmat->right));
461 jrow++;
462 }
463 PetscCall(VecResetArray(bmat->left));
464 }
465 PetscCall(VecRestoreArray(x, &xx));
466 PetscCall(VecRestoreArray(y, &yy));
467 PetscFunctionReturn(PETSC_SUCCESS);
468 }
469
MatMult_BlockMat_Symmetric(Mat A,Vec x,Vec y)470 static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
471 {
472 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
473 PetscScalar *xx, *yy;
474 PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
475 Mat *aa;
476
477 PetscFunctionBegin;
478 /*
479 Standard CSR multiply except each entry is a Mat
480 */
481 PetscCall(VecGetArray(x, &xx));
482
483 PetscCall(VecSet(y, 0.0));
484 PetscCall(VecGetArray(y, &yy));
485 aj = bmat->j;
486 aa = bmat->a;
487 ii = bmat->i;
488 for (i = 0; i < m; i++) {
489 jrow = ii[i];
490 n = ii[i + 1] - jrow;
491 PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
492 PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
493 /* if we ALWAYS required a diagonal entry then could remove this if test */
494 if (aj[jrow] == i) {
495 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
496 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
497 PetscCall(VecResetArray(bmat->right));
498 jrow++;
499 n--;
500 }
501 for (j = 0; j < n; j++) {
502 PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
503 PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
504 PetscCall(VecResetArray(bmat->right));
505
506 PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
507 PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
508 PetscCall(VecResetArray(bmat->right));
509 jrow++;
510 }
511 PetscCall(VecResetArray(bmat->left));
512 PetscCall(VecResetArray(bmat->middle));
513 }
514 PetscCall(VecRestoreArray(x, &xx));
515 PetscCall(VecRestoreArray(y, &yy));
516 PetscFunctionReturn(PETSC_SUCCESS);
517 }
518
MatMultAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)519 static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
520 {
521 PetscFunctionBegin;
522 PetscFunctionReturn(PETSC_SUCCESS);
523 }
524
MatMultTranspose_BlockMat(Mat A,Vec x,Vec y)525 static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
526 {
527 PetscFunctionBegin;
528 PetscFunctionReturn(PETSC_SUCCESS);
529 }
530
MatMultTransposeAdd_BlockMat(Mat A,Vec x,Vec y,Vec z)531 static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
532 {
533 PetscFunctionBegin;
534 PetscFunctionReturn(PETSC_SUCCESS);
535 }
536
MatCreateSubMatrix_BlockMat(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)537 static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
538 {
539 Mat_BlockMat *a = (Mat_BlockMat *)A->data;
540 Mat_SeqAIJ *c;
541 PetscInt i, k, first, step, lensi, nrows, ncols;
542 PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
543 PetscScalar *a_new;
544 Mat C, *aa = a->a;
545 PetscBool stride, equal;
546
547 PetscFunctionBegin;
548 PetscCall(ISEqual(isrow, iscol, &equal));
549 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
550 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
551 PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
552 PetscCall(ISStrideGetInfo(iscol, &first, &step));
553 PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
554
555 PetscCall(ISGetLocalSize(isrow, &nrows));
556 ncols = nrows;
557
558 /* create submatrix */
559 if (scall == MAT_REUSE_MATRIX) {
560 PetscInt n_cols, n_rows;
561 C = *B;
562 PetscCall(MatGetSize(C, &n_rows, &n_cols));
563 PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
564 PetscCall(MatZeroEntries(C));
565 } else {
566 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
567 PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
568 if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
569 else PetscCall(MatSetType(C, MATSEQAIJ));
570 PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
571 PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
572 }
573 c = (Mat_SeqAIJ *)C->data;
574
575 /* loop over rows inserting into submatrix */
576 a_new = c->a;
577 j_new = c->j;
578 i_new = c->i;
579
580 for (i = 0; i < nrows; i++) {
581 lensi = ailen[i];
582 for (k = 0; k < lensi; k++) {
583 *j_new++ = *aj++;
584 PetscCall(MatGetValue(*aa++, first, first, a_new++));
585 }
586 i_new[i + 1] = i_new[i] + lensi;
587 c->ilen[i] = lensi;
588 }
589
590 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
591 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
592 *B = C;
593 PetscFunctionReturn(PETSC_SUCCESS);
594 }
595
MatAssemblyEnd_BlockMat(Mat A,MatAssemblyType mode)596 static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
597 {
598 Mat_BlockMat *a = (Mat_BlockMat *)A->data;
599 PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
600 PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
601 Mat *aa = a->a, *ap;
602
603 PetscFunctionBegin;
604 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
605
606 if (m) rmax = ailen[0]; /* determine row with most nonzeros */
607 for (i = 1; i < m; i++) {
608 /* move each row back by the amount of empty slots (fshift) before it*/
609 fshift += imax[i - 1] - ailen[i - 1];
610 rmax = PetscMax(rmax, ailen[i]);
611 if (fshift) {
612 ip = aj + ai[i];
613 ap = aa + ai[i];
614 N = ailen[i];
615 for (j = 0; j < N; j++) {
616 ip[j - fshift] = ip[j];
617 ap[j - fshift] = ap[j];
618 }
619 }
620 ai[i] = ai[i - 1] + ailen[i - 1];
621 }
622 if (m) {
623 fshift += imax[m - 1] - ailen[m - 1];
624 ai[m] = ai[m - 1] + ailen[m - 1];
625 }
626 /* reset ilen and imax for each row */
627 for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
628 a->nz = ai[m];
629 for (i = 0; i < a->nz; i++) {
630 PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
631 PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
632 PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
633 }
634 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
635 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
636 PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
637
638 A->info.mallocs += a->reallocs;
639 a->reallocs = 0;
640 A->info.nz_unneeded = (double)fshift;
641 a->rmax = rmax;
642 PetscFunctionReturn(PETSC_SUCCESS);
643 }
644
MatSetOption_BlockMat(Mat A,MatOption opt,PetscBool flg)645 static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
646 {
647 PetscFunctionBegin;
648 if (opt == MAT_SYMMETRIC && flg) {
649 A->ops->sor = MatSOR_BlockMat_Symmetric;
650 A->ops->mult = MatMult_BlockMat_Symmetric;
651 } else {
652 PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
653 }
654 PetscFunctionReturn(PETSC_SUCCESS);
655 }
656
657 static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
658 NULL,
659 NULL,
660 MatMult_BlockMat,
661 /* 4*/ MatMultAdd_BlockMat,
662 MatMultTranspose_BlockMat,
663 MatMultTransposeAdd_BlockMat,
664 NULL,
665 NULL,
666 NULL,
667 /* 10*/ NULL,
668 NULL,
669 NULL,
670 MatSOR_BlockMat,
671 NULL,
672 /* 15*/ NULL,
673 NULL,
674 NULL,
675 NULL,
676 NULL,
677 /* 20*/ NULL,
678 MatAssemblyEnd_BlockMat,
679 MatSetOption_BlockMat,
680 NULL,
681 /* 24*/ NULL,
682 NULL,
683 NULL,
684 NULL,
685 NULL,
686 /* 29*/ NULL,
687 NULL,
688 NULL,
689 NULL,
690 NULL,
691 /* 34*/ NULL,
692 NULL,
693 NULL,
694 NULL,
695 NULL,
696 /* 39*/ NULL,
697 NULL,
698 NULL,
699 NULL,
700 NULL,
701 /* 44*/ NULL,
702 NULL,
703 MatShift_Basic,
704 NULL,
705 NULL,
706 /* 49*/ NULL,
707 NULL,
708 NULL,
709 NULL,
710 NULL,
711 /* 54*/ NULL,
712 NULL,
713 NULL,
714 NULL,
715 NULL,
716 /* 59*/ MatCreateSubMatrix_BlockMat,
717 MatDestroy_BlockMat,
718 MatView_BlockMat,
719 NULL,
720 NULL,
721 /* 64*/ NULL,
722 NULL,
723 NULL,
724 NULL,
725 NULL,
726 /* 69*/ NULL,
727 NULL,
728 NULL,
729 NULL,
730 NULL,
731 /* 74*/ NULL,
732 NULL,
733 NULL,
734 NULL,
735 MatLoad_BlockMat,
736 /* 79*/ NULL,
737 NULL,
738 NULL,
739 NULL,
740 NULL,
741 /* 84*/ NULL,
742 NULL,
743 NULL,
744 NULL,
745 NULL,
746 /* 89*/ NULL,
747 NULL,
748 NULL,
749 NULL,
750 NULL,
751 /* 94*/ NULL,
752 NULL,
753 NULL,
754 NULL,
755 NULL,
756 /* 99*/ NULL,
757 NULL,
758 NULL,
759 NULL,
760 NULL,
761 /*104*/ NULL,
762 NULL,
763 NULL,
764 NULL,
765 NULL,
766 /*109*/ NULL,
767 NULL,
768 NULL,
769 NULL,
770 NULL,
771 /*114*/ NULL,
772 NULL,
773 NULL,
774 NULL,
775 NULL,
776 /*119*/ NULL,
777 NULL,
778 NULL,
779 NULL,
780 NULL,
781 /*124*/ NULL,
782 NULL,
783 NULL,
784 NULL,
785 NULL,
786 /*129*/ NULL,
787 NULL,
788 NULL,
789 NULL,
790 NULL,
791 /*134*/ NULL,
792 NULL,
793 NULL,
794 NULL,
795 NULL,
796 /*139*/ NULL,
797 NULL,
798 NULL,
799 NULL,
800 NULL};
801
802 /*@C
803 MatBlockMatSetPreallocation - For good matrix assembly performance
804 the user should preallocate the matrix storage by setting the parameter nz
805 (or the array nnz). By setting these parameters accurately, performance
806 during matrix assembly can be increased by more than a factor of 50.
807
808 Collective
809
810 Input Parameters:
811 + B - The matrix
812 . bs - size of each block in matrix
813 . nz - number of nonzeros per block row (same for all rows)
814 - nnz - array containing the number of nonzeros in the various block rows
815 (possibly different for each row) or `NULL`
816
817 Level: intermediate
818
819 Notes:
820 If `nnz` is given then `nz` is ignored
821
822 Specify the preallocated storage with either `nz` or `nnz` (not both).
823 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
824 allocation.
825
826 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
827 @*/
MatBlockMatSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])828 PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
829 {
830 PetscFunctionBegin;
831 PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
832 PetscFunctionReturn(PETSC_SUCCESS);
833 }
834
MatBlockMatSetPreallocation_BlockMat(Mat A,PetscInt bs,PetscInt nz,PetscInt * nnz)835 static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
836 {
837 Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
838 PetscInt i;
839
840 PetscFunctionBegin;
841 PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
842 PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
843 PetscCall(PetscLayoutSetUp(A->rmap));
844 PetscCall(PetscLayoutSetUp(A->cmap));
845 PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
846
847 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
848 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
849 if (nnz) {
850 for (i = 0; i < A->rmap->n / bs; i++) {
851 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
852 PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
853 }
854 }
855 bmat->mbs = A->rmap->n / bs;
856
857 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
858 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
859 PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
860
861 if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
862 PetscCheck(PetscLikely(nnz), PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
863 nz = 0;
864 for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
865 bmat->imax[i] = nnz[i];
866 nz += nnz[i];
867 }
868
869 /* bmat->ilen will count nonzeros in each row so far. */
870 PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
871
872 /* allocate the matrix space */
873 PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
874 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscScalar), (void **)&bmat->a));
875 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&bmat->j));
876 PetscCall(PetscShmgetAllocateArray(A->rmap->n + 1, sizeof(PetscInt), (void **)&bmat->i));
877 bmat->free_a = PETSC_TRUE;
878 bmat->free_ij = PETSC_TRUE;
879
880 bmat->i[0] = 0;
881 for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
882
883 bmat->nz = 0;
884 bmat->maxnz = nz;
885 A->info.nz_unneeded = (double)bmat->maxnz;
886 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
887 PetscFunctionReturn(PETSC_SUCCESS);
888 }
889
890 /*MC
891 MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
892 consisting of (usually) sparse blocks.
893
894 Level: advanced
895
896 .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
897 M*/
898
MatCreate_BlockMat(Mat A)899 PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
900 {
901 Mat_BlockMat *b;
902
903 PetscFunctionBegin;
904 PetscCall(PetscNew(&b));
905 A->data = (void *)b;
906 A->ops[0] = MatOps_Values;
907 A->assembled = PETSC_TRUE;
908 A->preallocated = PETSC_FALSE;
909 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
910
911 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
912 PetscFunctionReturn(PETSC_SUCCESS);
913 }
914
915 /*@C
916 MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
917
918 Collective
919
920 Input Parameters:
921 + comm - MPI communicator
922 . m - number of rows
923 . n - number of columns
924 . bs - size of each submatrix
925 . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
926 - nnz - expected number of nonzers per block row if known (use `NULL` otherwise)
927
928 Output Parameter:
929 . A - the matrix
930
931 Level: intermediate
932
933 Notes:
934 Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must
935 have the same size and be sequential. The local and global sizes must be compatible with this decomposition.
936
937 For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
938
939 Developer Notes:
940 I don't like the name, it is not `MATNESTMAT`
941
942 .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
943 @*/
MatCreateBlockMat(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt bs,PetscInt nz,PetscInt * nnz,Mat * A)944 PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
945 {
946 PetscFunctionBegin;
947 PetscCall(MatCreate(comm, A));
948 PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
949 PetscCall(MatSetType(*A, MATBLOCKMAT));
950 PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
951 PetscFunctionReturn(PETSC_SUCCESS);
952 }
953