1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5
MatDestroy_MPISBAIJ(Mat mat)6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9
10 PetscFunctionBegin;
11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12 PetscCall(MatStashDestroy_Private(&mat->stash));
13 PetscCall(MatStashDestroy_Private(&mat->bstash));
14 PetscCall(MatDestroy(&baij->A));
15 PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17 PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19 PetscCall(PetscFree(baij->colmap));
20 #endif
21 PetscCall(PetscFree(baij->garray));
22 PetscCall(VecDestroy(&baij->lvec));
23 PetscCall(VecScatterDestroy(&baij->Mvctx));
24 PetscCall(VecDestroy(&baij->slvec0));
25 PetscCall(VecDestroy(&baij->slvec0b));
26 PetscCall(VecDestroy(&baij->slvec1));
27 PetscCall(VecDestroy(&baij->slvec1a));
28 PetscCall(VecDestroy(&baij->slvec1b));
29 PetscCall(VecScatterDestroy(&baij->sMvctx));
30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31 PetscCall(PetscFree(baij->barray));
32 PetscCall(PetscFree(baij->hd));
33 PetscCall(VecDestroy(&baij->diag));
34 PetscCall(VecDestroy(&baij->bb1));
35 PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37 PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39 PetscCall(PetscFree(baij->in_loc));
40 PetscCall(PetscFree(baij->v_loc));
41 PetscCall(PetscFree(baij->rangebs));
42 PetscCall(PetscFree(mat->data));
43
44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57 PetscFunctionReturn(PETSC_SUCCESS);
58 }
59
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73
74 /* This could be moved to matimpl.h */
MatPreallocateWithMats_Private(Mat B,PetscInt nm,Mat X[],PetscBool symm[],PetscBool fill)75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77 Mat preallocator;
78 PetscInt r, rstart, rend;
79 PetscInt bs, i, m, n, M, N;
80 PetscBool cong = PETSC_TRUE;
81
82 PetscFunctionBegin;
83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84 PetscValidLogicalCollectiveInt(B, nm, 2);
85 for (i = 0; i < nm; i++) {
86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89 }
90 PetscValidLogicalCollectiveBool(B, fill, 5);
91 PetscCall(MatGetBlockSize(B, &bs));
92 PetscCall(MatGetSize(B, &M, &N));
93 PetscCall(MatGetLocalSize(B, &m, &n));
94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96 PetscCall(MatSetBlockSize(preallocator, bs));
97 PetscCall(MatSetSizes(preallocator, m, n, M, N));
98 PetscCall(MatSetUp(preallocator));
99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100 for (r = rstart; r < rend; ++r) {
101 PetscInt ncols;
102 const PetscInt *row;
103 const PetscScalar *vals;
104
105 for (i = 0; i < nm; i++) {
106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110 }
111 }
112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115 PetscCall(MatDestroy(&preallocator));
116 PetscFunctionReturn(PETSC_SUCCESS);
117 }
118
MatConvert_MPISBAIJ_Basic(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121 Mat B;
122 PetscInt r;
123
124 PetscFunctionBegin;
125 if (reuse != MAT_REUSE_MATRIX) {
126 PetscBool symm = PETSC_TRUE, isdense;
127 PetscInt bs;
128
129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131 PetscCall(MatSetType(B, newtype));
132 PetscCall(MatGetBlockSize(A, &bs));
133 PetscCall(MatSetBlockSize(B, bs));
134 PetscCall(PetscLayoutSetUp(B->rmap));
135 PetscCall(PetscLayoutSetUp(B->cmap));
136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137 if (!isdense) {
138 PetscCall(MatGetRowUpperTriangular(A));
139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140 PetscCall(MatRestoreRowUpperTriangular(A));
141 } else {
142 PetscCall(MatSetUp(B));
143 }
144 } else {
145 B = *newmat;
146 PetscCall(MatZeroEntries(B));
147 }
148
149 PetscCall(MatGetRowUpperTriangular(A));
150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151 PetscInt ncols;
152 const PetscInt *row;
153 const PetscScalar *vals;
154
155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158 if (A->hermitian == PETSC_BOOL3_TRUE) {
159 PetscInt i;
160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161 } else {
162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163 }
164 #else
165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168 }
169 PetscCall(MatRestoreRowUpperTriangular(A));
170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172
173 if (reuse == MAT_INPLACE_MATRIX) {
174 PetscCall(MatHeaderReplace(A, &B));
175 } else {
176 *newmat = B;
177 }
178 PetscFunctionReturn(PETSC_SUCCESS);
179 }
180
MatStoreValues_MPISBAIJ(Mat mat)181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184
185 PetscFunctionBegin;
186 PetscCall(MatStoreValues(aij->A));
187 PetscCall(MatStoreValues(aij->B));
188 PetscFunctionReturn(PETSC_SUCCESS);
189 }
190
MatRetrieveValues_MPISBAIJ(Mat mat)191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194
195 PetscFunctionBegin;
196 PetscCall(MatRetrieveValues(aij->A));
197 PetscCall(MatRetrieveValues(aij->B));
198 PetscFunctionReturn(PETSC_SUCCESS);
199 }
200
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202 do { \
203 brow = row / bs; \
204 rp = aj + ai[brow]; \
205 ap = aa + bs2 * ai[brow]; \
206 rmax = aimax[brow]; \
207 nrow = ailen[brow]; \
208 bcol = col / bs; \
209 ridx = row % bs; \
210 cidx = col % bs; \
211 low = 0; \
212 high = nrow; \
213 while (high - low > 3) { \
214 t = (low + high) / 2; \
215 if (rp[t] > bcol) high = t; \
216 else low = t; \
217 } \
218 for (_i = low; _i < high; _i++) { \
219 if (rp[_i] > bcol) break; \
220 if (rp[_i] == bcol) { \
221 bap = ap + bs2 * _i + bs * cidx + ridx; \
222 if (addv == ADD_VALUES) *bap += value; \
223 else *bap = value; \
224 goto a_noinsert; \
225 } \
226 } \
227 if (a->nonew == 1) goto a_noinsert; \
228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230 N = nrow++ - 1; \
231 /* shift up all the later entries in this row */ \
232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235 rp[_i] = bcol; \
236 ap[bs2 * _i + bs * cidx + ridx] = value; \
237 a_noinsert:; \
238 ailen[brow] = nrow; \
239 } while (0)
240
241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
242 do { \
243 brow = row / bs; \
244 rp = bj + bi[brow]; \
245 ap = ba + bs2 * bi[brow]; \
246 rmax = bimax[brow]; \
247 nrow = bilen[brow]; \
248 bcol = col / bs; \
249 ridx = row % bs; \
250 cidx = col % bs; \
251 low = 0; \
252 high = nrow; \
253 while (high - low > 3) { \
254 t = (low + high) / 2; \
255 if (rp[t] > bcol) high = t; \
256 else low = t; \
257 } \
258 for (_i = low; _i < high; _i++) { \
259 if (rp[_i] > bcol) break; \
260 if (rp[_i] == bcol) { \
261 bap = ap + bs2 * _i + bs * cidx + ridx; \
262 if (addv == ADD_VALUES) *bap += value; \
263 else *bap = value; \
264 goto b_noinsert; \
265 } \
266 } \
267 if (b->nonew == 1) goto b_noinsert; \
268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
270 N = nrow++ - 1; \
271 /* shift up all the later entries in this row */ \
272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
275 rp[_i] = bcol; \
276 ap[bs2 * _i + bs * cidx + ridx] = value; \
277 b_noinsert:; \
278 bilen[brow] = nrow; \
279 } while (0)
280
281 /* Only add/insert a(i,j) with i<=j (blocks).
282 Any a(i,j) with i>j input by user is ignored or generates an error
283 */
MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
285 {
286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
287 MatScalar value;
288 PetscBool roworiented = baij->roworiented;
289 PetscInt i, j, row, col;
290 PetscInt rstart_orig = mat->rmap->rstart;
291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
293
294 /* Some Variables required in the macro */
295 Mat A = baij->A;
296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
298 MatScalar *aa = a->a;
299
300 Mat B = baij->B;
301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
303 MatScalar *ba = b->a;
304
305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol;
306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2;
307 MatScalar *ap, *bap;
308
309 /* for stash */
310 PetscInt n_loc, *in_loc = NULL;
311 MatScalar *v_loc = NULL;
312
313 PetscFunctionBegin;
314 if (!baij->donotstash) {
315 if (n > baij->n_loc) {
316 PetscCall(PetscFree(baij->in_loc));
317 PetscCall(PetscFree(baij->v_loc));
318 PetscCall(PetscMalloc1(n, &baij->in_loc));
319 PetscCall(PetscMalloc1(n, &baij->v_loc));
320
321 baij->n_loc = n;
322 }
323 in_loc = baij->in_loc;
324 v_loc = baij->v_loc;
325 }
326
327 for (i = 0; i < m; i++) {
328 if (im[i] < 0) continue;
329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
331 row = im[i] - rstart_orig; /* local row index */
332 for (j = 0; j < n; j++) {
333 if (im[i] / bs > in[j] / bs) {
334 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
335 continue; /* ignore lower triangular blocks */
336 }
337 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
338 col = in[j] - cstart_orig; /* local col index */
339 brow = row / bs;
340 bcol = col / bs;
341 if (brow > bcol) continue; /* ignore lower triangular blocks of A */
342 if (roworiented) value = v[i * n + j];
343 else value = v[i + j * m];
344 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
345 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
346 } else if (in[j] < 0) {
347 continue;
348 } else {
349 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
350 /* off-diag entry (B) */
351 if (mat->was_assembled) {
352 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
353 #if defined(PETSC_USE_CTABLE)
354 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
355 col = col - 1;
356 #else
357 col = baij->colmap[in[j] / bs] - 1;
358 #endif
359 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
360 PetscCall(MatDisAssemble_MPISBAIJ(mat));
361 col = in[j];
362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363 B = baij->B;
364 b = (Mat_SeqBAIJ *)B->data;
365 bimax = b->imax;
366 bi = b->i;
367 bilen = b->ilen;
368 bj = b->j;
369 ba = b->a;
370 } else col += in[j] % bs;
371 } else col = in[j];
372 if (roworiented) value = v[i * n + j];
373 else value = v[i + j * m];
374 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
375 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
376 }
377 }
378 } else { /* off processor entry */
379 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
380 if (!baij->donotstash) {
381 mat->assembled = PETSC_FALSE;
382 n_loc = 0;
383 for (j = 0; j < n; j++) {
384 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
385 in_loc[n_loc] = in[j];
386 if (roworiented) {
387 v_loc[n_loc] = v[i * n + j];
388 } else {
389 v_loc[n_loc] = v[j * m + i];
390 }
391 n_loc++;
392 }
393 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
394 }
395 }
396 }
397 PetscFunctionReturn(PETSC_SUCCESS);
398 }
399
MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
401 {
402 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
403 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N;
404 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
405 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
406 PetscBool roworiented = a->roworiented;
407 const PetscScalar *value = v;
408 MatScalar *ap, *aa = a->a, *bap;
409
410 PetscFunctionBegin;
411 if (col < row) {
412 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
413 PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
414 }
415 rp = aj + ai[row];
416 ap = aa + bs2 * ai[row];
417 rmax = imax[row];
418 nrow = ailen[row];
419 value = v;
420 low = 0;
421 high = nrow;
422
423 while (high - low > 7) {
424 t = (low + high) / 2;
425 if (rp[t] > col) high = t;
426 else low = t;
427 }
428 for (i = low; i < high; i++) {
429 if (rp[i] > col) break;
430 if (rp[i] == col) {
431 bap = ap + bs2 * i;
432 if (roworiented) {
433 if (is == ADD_VALUES) {
434 for (ii = 0; ii < bs; ii++) {
435 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
436 }
437 } else {
438 for (ii = 0; ii < bs; ii++) {
439 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
440 }
441 }
442 } else {
443 if (is == ADD_VALUES) {
444 for (ii = 0; ii < bs; ii++) {
445 for (jj = 0; jj < bs; jj++) *bap++ += *value++;
446 }
447 } else {
448 for (ii = 0; ii < bs; ii++) {
449 for (jj = 0; jj < bs; jj++) *bap++ = *value++;
450 }
451 }
452 }
453 goto noinsert2;
454 }
455 }
456 if (nonew == 1) goto noinsert2;
457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
458 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
459 N = nrow++ - 1;
460 high++;
461 /* shift up all the later entries in this row */
462 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
463 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
464 rp[i] = col;
465 bap = ap + bs2 * i;
466 if (roworiented) {
467 for (ii = 0; ii < bs; ii++) {
468 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
469 }
470 } else {
471 for (ii = 0; ii < bs; ii++) {
472 for (jj = 0; jj < bs; jj++) *bap++ = *value++;
473 }
474 }
475 noinsert2:;
476 ailen[row] = nrow;
477 PetscFunctionReturn(PETSC_SUCCESS);
478 }
479
480 /*
481 This routine is exactly duplicated in mpibaij.c
482 */
MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
484 {
485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
486 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N;
487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
488 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
489 PetscBool roworiented = a->roworiented;
490 const PetscScalar *value = v;
491 MatScalar *ap, *aa = a->a, *bap;
492
493 PetscFunctionBegin;
494 rp = aj + ai[row];
495 ap = aa + bs2 * ai[row];
496 rmax = imax[row];
497 nrow = ailen[row];
498 low = 0;
499 high = nrow;
500 value = v;
501 while (high - low > 7) {
502 t = (low + high) / 2;
503 if (rp[t] > col) high = t;
504 else low = t;
505 }
506 for (i = low; i < high; i++) {
507 if (rp[i] > col) break;
508 if (rp[i] == col) {
509 bap = ap + bs2 * i;
510 if (roworiented) {
511 if (is == ADD_VALUES) {
512 for (ii = 0; ii < bs; ii++) {
513 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
514 }
515 } else {
516 for (ii = 0; ii < bs; ii++) {
517 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
518 }
519 }
520 } else {
521 if (is == ADD_VALUES) {
522 for (ii = 0; ii < bs; ii++, value += bs) {
523 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
524 bap += bs;
525 }
526 } else {
527 for (ii = 0; ii < bs; ii++, value += bs) {
528 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
529 bap += bs;
530 }
531 }
532 }
533 goto noinsert2;
534 }
535 }
536 if (nonew == 1) goto noinsert2;
537 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
538 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
539 N = nrow++ - 1;
540 high++;
541 /* shift up all the later entries in this row */
542 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
543 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
544 rp[i] = col;
545 bap = ap + bs2 * i;
546 if (roworiented) {
547 for (ii = 0; ii < bs; ii++) {
548 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
549 }
550 } else {
551 for (ii = 0; ii < bs; ii++) {
552 for (jj = 0; jj < bs; jj++) *bap++ = *value++;
553 }
554 }
555 noinsert2:;
556 ailen[row] = nrow;
557 PetscFunctionReturn(PETSC_SUCCESS);
558 }
559
560 /*
561 This routine could be optimized by removing the need for the block copy below and passing stride information
562 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
563 */
MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
565 {
566 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
567 const MatScalar *value;
568 MatScalar *barray = baij->barray;
569 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
570 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs;
571 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval;
572 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
573
574 PetscFunctionBegin;
575 if (!barray) {
576 PetscCall(PetscMalloc1(bs2, &barray));
577 baij->barray = barray;
578 }
579
580 if (roworiented) {
581 stepval = (n - 1) * bs;
582 } else {
583 stepval = (m - 1) * bs;
584 }
585 for (i = 0; i < m; i++) {
586 if (im[i] < 0) continue;
587 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
588 if (im[i] >= rstart && im[i] < rend) {
589 row = im[i] - rstart;
590 for (j = 0; j < n; j++) {
591 if (im[i] > in[j]) {
592 PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
593 continue; /* ignore lower triangular blocks */
594 }
595 /* If NumCol = 1 then a copy is not required */
596 if (roworiented && n == 1) {
597 barray = (MatScalar *)v + i * bs2;
598 } else if ((!roworiented) && (m == 1)) {
599 barray = (MatScalar *)v + j * bs2;
600 } else { /* Here a copy is required */
601 if (roworiented) {
602 value = v + i * (stepval + bs) * bs + j * bs;
603 } else {
604 value = v + j * (stepval + bs) * bs + i * bs;
605 }
606 for (ii = 0; ii < bs; ii++, value += stepval) {
607 for (jj = 0; jj < bs; jj++) *barray++ = *value++;
608 }
609 barray -= bs2;
610 }
611
612 if (in[j] >= cstart && in[j] < cend) {
613 col = in[j] - cstart;
614 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
615 } else if (in[j] < 0) {
616 continue;
617 } else {
618 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
619 if (mat->was_assembled) {
620 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
621
622 #if defined(PETSC_USE_CTABLE)
623 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
624 col = col < 1 ? -1 : (col - 1) / bs;
625 #else
626 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
627 #endif
628 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
629 PetscCall(MatDisAssemble_MPISBAIJ(mat));
630 col = in[j];
631 }
632 } else col = in[j];
633 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
634 }
635 }
636 } else {
637 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
638 if (!baij->donotstash) {
639 if (roworiented) {
640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641 } else {
642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643 }
644 }
645 }
646 }
647 PetscFunctionReturn(PETSC_SUCCESS);
648 }
649
MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
651 {
652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
653 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
654 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
655
656 PetscFunctionBegin;
657 for (i = 0; i < m; i++) {
658 if (idxm[i] < 0) continue; /* negative row */
659 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
660 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
661 row = idxm[i] - bsrstart;
662 for (j = 0; j < n; j++) {
663 if (idxn[j] < 0) continue; /* negative column */
664 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
665 if (idxn[j] >= bscstart && idxn[j] < bscend) {
666 col = idxn[j] - bscstart;
667 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
668 } else {
669 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
670 #if defined(PETSC_USE_CTABLE)
671 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
672 data--;
673 #else
674 data = baij->colmap[idxn[j] / bs] - 1;
675 #endif
676 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
677 else {
678 col = data + idxn[j] % bs;
679 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
680 }
681 }
682 }
683 }
684 PetscFunctionReturn(PETSC_SUCCESS);
685 }
686
MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal * norm)687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
688 {
689 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
690 PetscReal sum[2], *lnorm2;
691
692 PetscFunctionBegin;
693 if (baij->size == 1) {
694 PetscCall(MatNorm(baij->A, type, norm));
695 } else {
696 if (type == NORM_FROBENIUS) {
697 PetscCall(PetscMalloc1(2, &lnorm2));
698 PetscCall(MatNorm(baij->A, type, lnorm2));
699 *lnorm2 = (*lnorm2) * (*lnorm2);
700 lnorm2++; /* square power of norm(A) */
701 PetscCall(MatNorm(baij->B, type, lnorm2));
702 *lnorm2 = (*lnorm2) * (*lnorm2);
703 lnorm2--; /* square power of norm(B) */
704 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
705 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
706 PetscCall(PetscFree(lnorm2));
707 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
708 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
709 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data;
710 PetscReal *rsum, vabs;
711 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
712 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
713 MatScalar *v;
714
715 PetscCall(PetscCalloc1(mat->cmap->N, &rsum));
716 /* Amat */
717 v = amat->a;
718 jj = amat->j;
719 for (brow = 0; brow < mbs; brow++) {
720 grow = bs * (rstart + brow);
721 nz = amat->i[brow + 1] - amat->i[brow];
722 for (bcol = 0; bcol < nz; bcol++) {
723 gcol = bs * (rstart + *jj);
724 jj++;
725 for (col = 0; col < bs; col++) {
726 for (row = 0; row < bs; row++) {
727 vabs = PetscAbsScalar(*v);
728 v++;
729 rsum[gcol + col] += vabs;
730 /* non-diagonal block */
731 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
732 }
733 }
734 }
735 PetscCall(PetscLogFlops(nz * bs * bs));
736 }
737 /* Bmat */
738 v = bmat->a;
739 jj = bmat->j;
740 for (brow = 0; brow < mbs; brow++) {
741 grow = bs * (rstart + brow);
742 nz = bmat->i[brow + 1] - bmat->i[brow];
743 for (bcol = 0; bcol < nz; bcol++) {
744 gcol = bs * garray[*jj];
745 jj++;
746 for (col = 0; col < bs; col++) {
747 for (row = 0; row < bs; row++) {
748 vabs = PetscAbsScalar(*v);
749 v++;
750 rsum[gcol + col] += vabs;
751 rsum[grow + row] += vabs;
752 }
753 }
754 }
755 PetscCall(PetscLogFlops(nz * bs * bs));
756 }
757 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
758 *norm = 0.0;
759 for (col = 0; col < mat->cmap->N; col++) {
760 if (rsum[col] > *norm) *norm = rsum[col];
761 }
762 PetscCall(PetscFree(rsum));
763 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
764 }
765 PetscFunctionReturn(PETSC_SUCCESS);
766 }
767
MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)768 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
769 {
770 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
771 PetscInt nstash, reallocs;
772
773 PetscFunctionBegin;
774 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
775
776 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
777 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
778 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
779 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
780 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
781 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
782 PetscFunctionReturn(PETSC_SUCCESS);
783 }
784
MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)785 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
786 {
787 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data;
789 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2;
790 PetscInt *row, *col;
791 PetscBool all_assembled;
792 PetscMPIInt n;
793 PetscBool r1, r2, r3;
794 MatScalar *val;
795
796 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
797 PetscFunctionBegin;
798 if (!baij->donotstash && !mat->nooffprocentries) {
799 while (1) {
800 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
801 if (!flg) break;
802
803 for (i = 0; i < n;) {
804 /* Now identify the consecutive vals belonging to the same row */
805 for (j = i, rstart = row[j]; j < n; j++) {
806 if (row[j] != rstart) break;
807 }
808 if (j < n) ncols = j - i;
809 else ncols = n - i;
810 /* Now assemble all these values with a single function call */
811 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
812 i = j;
813 }
814 }
815 PetscCall(MatStashScatterEnd_Private(&mat->stash));
816 /* Now process the block-stash. Since the values are stashed column-oriented,
817 set the row-oriented flag to column-oriented, and after MatSetValues()
818 restore the original flags */
819 r1 = baij->roworiented;
820 r2 = a->roworiented;
821 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
822
823 baij->roworiented = PETSC_FALSE;
824 a->roworiented = PETSC_FALSE;
825
826 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */
827 while (1) {
828 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
829 if (!flg) break;
830
831 for (i = 0; i < n;) {
832 /* Now identify the consecutive vals belonging to the same row */
833 for (j = i, rstart = row[j]; j < n; j++) {
834 if (row[j] != rstart) break;
835 }
836 if (j < n) ncols = j - i;
837 else ncols = n - i;
838 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
839 i = j;
840 }
841 }
842 PetscCall(MatStashScatterEnd_Private(&mat->bstash));
843
844 baij->roworiented = r1;
845 a->roworiented = r2;
846
847 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
848 }
849
850 PetscCall(MatAssemblyBegin(baij->A, mode));
851 PetscCall(MatAssemblyEnd(baij->A, mode));
852
853 /* determine if any process has disassembled, if so we must
854 also disassemble ourselves, in order that we may reassemble. */
855 /*
856 if nonzero structure of submatrix B cannot change then we know that
857 no process disassembled thus we can skip this stuff
858 */
859 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
860 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
861 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
862 }
863
864 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
865 PetscCall(MatAssemblyBegin(baij->B, mode));
866 PetscCall(MatAssemblyEnd(baij->B, mode));
867
868 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
869
870 baij->rowvalues = NULL;
871
872 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
873 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
874 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
875 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
876 }
877 PetscFunctionReturn(PETSC_SUCCESS);
878 }
879
880 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
881 #include <petscdraw.h>
MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)882 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
883 {
884 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
885 PetscInt bs = mat->rmap->bs;
886 PetscMPIInt rank = baij->rank;
887 PetscBool isascii, isdraw;
888 PetscViewer sviewer;
889 PetscViewerFormat format;
890
891 PetscFunctionBegin;
892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
894 if (isascii) {
895 PetscCall(PetscViewerGetFormat(viewer, &format));
896 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
897 MatInfo info;
898 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
899 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
900 PetscCall(PetscViewerASCIIPushSynchronized(viewer));
901 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
902 mat->rmap->bs, info.memory));
903 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
904 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
905 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
906 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
907 PetscCall(PetscViewerFlush(viewer));
908 PetscCall(PetscViewerASCIIPopSynchronized(viewer));
909 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
910 PetscCall(VecScatterView(baij->Mvctx, viewer));
911 PetscFunctionReturn(PETSC_SUCCESS);
912 } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_FACTOR_INFO) PetscFunctionReturn(PETSC_SUCCESS);
913 }
914
915 if (isdraw) {
916 PetscDraw draw;
917 PetscBool isnull;
918 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
919 PetscCall(PetscDrawIsNull(draw, &isnull));
920 if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
921 }
922
923 {
924 /* assemble the entire matrix onto first processor. */
925 Mat A;
926 Mat_SeqSBAIJ *Aloc;
927 Mat_SeqBAIJ *Bloc;
928 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
929 MatScalar *a;
930 const char *matname;
931
932 /* Should this be the same type as mat? */
933 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
934 if (rank == 0) {
935 PetscCall(MatSetSizes(A, M, N, M, N));
936 } else {
937 PetscCall(MatSetSizes(A, 0, 0, M, N));
938 }
939 PetscCall(MatSetType(A, MATMPISBAIJ));
940 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
941 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
942
943 /* copy over the A part */
944 Aloc = (Mat_SeqSBAIJ *)baij->A->data;
945 ai = Aloc->i;
946 aj = Aloc->j;
947 a = Aloc->a;
948 PetscCall(PetscMalloc1(bs, &rvals));
949
950 for (i = 0; i < mbs; i++) {
951 rvals[0] = bs * (baij->rstartbs + i);
952 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
953 for (j = ai[i]; j < ai[i + 1]; j++) {
954 col = (baij->cstartbs + aj[j]) * bs;
955 for (k = 0; k < bs; k++) {
956 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
957 col++;
958 a += bs;
959 }
960 }
961 }
962 /* copy over the B part */
963 Bloc = (Mat_SeqBAIJ *)baij->B->data;
964 ai = Bloc->i;
965 aj = Bloc->j;
966 a = Bloc->a;
967 for (i = 0; i < mbs; i++) {
968 rvals[0] = bs * (baij->rstartbs + i);
969 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
970 for (j = ai[i]; j < ai[i + 1]; j++) {
971 col = baij->garray[aj[j]] * bs;
972 for (k = 0; k < bs; k++) {
973 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
974 col++;
975 a += bs;
976 }
977 }
978 }
979 PetscCall(PetscFree(rvals));
980 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
981 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
982 /*
983 Everyone has to call to draw the matrix since the graphics waits are
984 synchronized across all processors that share the PetscDraw object
985 */
986 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
987 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
988 if (rank == 0) {
989 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
990 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
991 }
992 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
993 PetscCall(MatDestroy(&A));
994 }
995 PetscFunctionReturn(PETSC_SUCCESS);
996 }
997
998 /* Used for both MPIBAIJ and MPISBAIJ matrices */
999 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1000
MatView_MPISBAIJ(Mat mat,PetscViewer viewer)1001 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1002 {
1003 PetscBool isascii, isdraw, issocket, isbinary;
1004
1005 PetscFunctionBegin;
1006 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1007 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1008 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1009 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1010 if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1011 else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1012 PetscFunctionReturn(PETSC_SUCCESS);
1013 }
1014
1015 #if defined(PETSC_USE_COMPLEX)
MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)1016 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1017 {
1018 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1019 PetscInt mbs = a->mbs, bs = A->rmap->bs;
1020 PetscScalar *from;
1021 const PetscScalar *x;
1022
1023 PetscFunctionBegin;
1024 /* diagonal part */
1025 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1026 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1027 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1028 PetscCall(VecZeroEntries(a->slvec1b));
1029
1030 /* subdiagonal part */
1031 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1032 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1033
1034 /* copy x into the vec slvec0 */
1035 PetscCall(VecGetArray(a->slvec0, &from));
1036 PetscCall(VecGetArrayRead(xx, &x));
1037
1038 PetscCall(PetscArraycpy(from, x, bs * mbs));
1039 PetscCall(VecRestoreArray(a->slvec0, &from));
1040 PetscCall(VecRestoreArrayRead(xx, &x));
1041
1042 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1043 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1044 /* supperdiagonal part */
1045 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1046 PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 #endif
1049
MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)1050 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1051 {
1052 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1053 PetscInt mbs = a->mbs, bs = A->rmap->bs;
1054 PetscScalar *from;
1055 const PetscScalar *x;
1056
1057 PetscFunctionBegin;
1058 /* diagonal part */
1059 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1060 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1061 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1062 PetscCall(VecZeroEntries(a->slvec1b));
1063
1064 /* subdiagonal part */
1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1066
1067 /* copy x into the vec slvec0 */
1068 PetscCall(VecGetArray(a->slvec0, &from));
1069 PetscCall(VecGetArrayRead(xx, &x));
1070
1071 PetscCall(PetscArraycpy(from, x, bs * mbs));
1072 PetscCall(VecRestoreArray(a->slvec0, &from));
1073 PetscCall(VecRestoreArrayRead(xx, &x));
1074
1075 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1076 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1077 /* supperdiagonal part */
1078 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1079 PetscFunctionReturn(PETSC_SUCCESS);
1080 }
1081
1082 #if PetscDefined(USE_COMPLEX)
MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)1083 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1084 {
1085 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1086 PetscInt mbs = a->mbs, bs = A->rmap->bs;
1087 PetscScalar *from;
1088 const PetscScalar *x;
1089
1090 PetscFunctionBegin;
1091 /* diagonal part */
1092 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1093 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1094 PetscCall(VecZeroEntries(a->slvec1b));
1095
1096 /* subdiagonal part */
1097 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1098 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1099
1100 /* copy x into the vec slvec0 */
1101 PetscCall(VecGetArray(a->slvec0, &from));
1102 PetscCall(VecGetArrayRead(xx, &x));
1103 PetscCall(PetscArraycpy(from, x, bs * mbs));
1104 PetscCall(VecRestoreArray(a->slvec0, &from));
1105
1106 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1107 PetscCall(VecRestoreArrayRead(xx, &x));
1108 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1109
1110 /* supperdiagonal part */
1111 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1112 PetscFunctionReturn(PETSC_SUCCESS);
1113 }
1114 #endif
1115
MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1116 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1117 {
1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1119 PetscInt mbs = a->mbs, bs = A->rmap->bs;
1120 PetscScalar *from;
1121 const PetscScalar *x;
1122
1123 PetscFunctionBegin;
1124 /* diagonal part */
1125 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1126 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1127 PetscCall(VecZeroEntries(a->slvec1b));
1128
1129 /* subdiagonal part */
1130 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1131
1132 /* copy x into the vec slvec0 */
1133 PetscCall(VecGetArray(a->slvec0, &from));
1134 PetscCall(VecGetArrayRead(xx, &x));
1135 PetscCall(PetscArraycpy(from, x, bs * mbs));
1136 PetscCall(VecRestoreArray(a->slvec0, &from));
1137
1138 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1139 PetscCall(VecRestoreArrayRead(xx, &x));
1140 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1141
1142 /* supperdiagonal part */
1143 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1144 PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146
1147 /*
1148 This only works correctly for square matrices where the subblock A->A is the
1149 diagonal block
1150 */
MatGetDiagonal_MPISBAIJ(Mat A,Vec v)1151 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1152 {
1153 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1154
1155 PetscFunctionBegin;
1156 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1157 PetscCall(MatGetDiagonal(a->A, v));
1158 PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160
MatScale_MPISBAIJ(Mat A,PetscScalar aa)1161 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1162 {
1163 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1164
1165 PetscFunctionBegin;
1166 PetscCall(MatScale(a->A, aa));
1167 PetscCall(MatScale(a->B, aa));
1168 PetscFunctionReturn(PETSC_SUCCESS);
1169 }
1170
MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1171 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1172 {
1173 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1174 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1175 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1176 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1177 PetscInt *cmap, *idx_p, cstart = mat->rstartbs;
1178
1179 PetscFunctionBegin;
1180 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1181 mat->getrowactive = PETSC_TRUE;
1182
1183 if (!mat->rowvalues && (idx || v)) {
1184 /*
1185 allocate enough space to hold information from the longest row.
1186 */
1187 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data;
1188 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data;
1189 PetscInt max = 1, mbs = mat->mbs, tmp;
1190 for (i = 0; i < mbs; i++) {
1191 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1192 if (max < tmp) max = tmp;
1193 }
1194 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1195 }
1196
1197 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1198 lrow = row - brstart; /* local row index */
1199
1200 pvA = &vworkA;
1201 pcA = &cworkA;
1202 pvB = &vworkB;
1203 pcB = &cworkB;
1204 if (!v) {
1205 pvA = NULL;
1206 pvB = NULL;
1207 }
1208 if (!idx) {
1209 pcA = NULL;
1210 if (!v) pcB = NULL;
1211 }
1212 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1213 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1214 nztot = nzA + nzB;
1215
1216 cmap = mat->garray;
1217 if (v || idx) {
1218 if (nztot) {
1219 /* Sort by increasing column numbers, assuming A and B already sorted */
1220 PetscInt imark = -1;
1221 if (v) {
1222 *v = v_p = mat->rowvalues;
1223 for (i = 0; i < nzB; i++) {
1224 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1225 else break;
1226 }
1227 imark = i;
1228 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1229 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1230 }
1231 if (idx) {
1232 *idx = idx_p = mat->rowindices;
1233 if (imark > -1) {
1234 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1235 } else {
1236 for (i = 0; i < nzB; i++) {
1237 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1238 else break;
1239 }
1240 imark = i;
1241 }
1242 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1243 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244 }
1245 } else {
1246 if (idx) *idx = NULL;
1247 if (v) *v = NULL;
1248 }
1249 }
1250 *nz = nztot;
1251 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1252 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1253 PetscFunctionReturn(PETSC_SUCCESS);
1254 }
1255
MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1256 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1257 {
1258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1259
1260 PetscFunctionBegin;
1261 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1262 baij->getrowactive = PETSC_FALSE;
1263 PetscFunctionReturn(PETSC_SUCCESS);
1264 }
1265
MatGetRowUpperTriangular_MPISBAIJ(Mat A)1266 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1267 {
1268 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1269 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1270
1271 PetscFunctionBegin;
1272 aA->getrow_utriangular = PETSC_TRUE;
1273 PetscFunctionReturn(PETSC_SUCCESS);
1274 }
MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)1275 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1276 {
1277 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1278 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1279
1280 PetscFunctionBegin;
1281 aA->getrow_utriangular = PETSC_FALSE;
1282 PetscFunctionReturn(PETSC_SUCCESS);
1283 }
1284
MatConjugate_MPISBAIJ(Mat mat)1285 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1286 {
1287 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1288
1289 PetscFunctionBegin;
1290 PetscCall(MatConjugate(a->A));
1291 PetscCall(MatConjugate(a->B));
1292 PetscFunctionReturn(PETSC_SUCCESS);
1293 }
1294
MatRealPart_MPISBAIJ(Mat A)1295 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1296 {
1297 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1298
1299 PetscFunctionBegin;
1300 PetscCall(MatRealPart(a->A));
1301 PetscCall(MatRealPart(a->B));
1302 PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304
MatImaginaryPart_MPISBAIJ(Mat A)1305 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1306 {
1307 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1308
1309 PetscFunctionBegin;
1310 PetscCall(MatImaginaryPart(a->A));
1311 PetscCall(MatImaginaryPart(a->B));
1312 PetscFunctionReturn(PETSC_SUCCESS);
1313 }
1314
1315 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1316 Input: isrow - distributed(parallel),
1317 iscol_local - locally owned (seq)
1318 */
ISEqual_private(IS isrow,IS iscol_local,PetscBool * flg)1319 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1320 {
1321 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch;
1322 const PetscInt *ptr1, *ptr2;
1323
1324 PetscFunctionBegin;
1325 *flg = PETSC_FALSE;
1326 PetscCall(ISGetLocalSize(isrow, &sz1));
1327 PetscCall(ISGetLocalSize(iscol_local, &sz2));
1328 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1329
1330 PetscCall(ISGetIndices(isrow, &ptr1));
1331 PetscCall(ISGetIndices(iscol_local, &ptr2));
1332
1333 PetscCall(PetscMalloc1(sz1, &a1));
1334 PetscCall(PetscMalloc1(sz2, &a2));
1335 PetscCall(PetscArraycpy(a1, ptr1, sz1));
1336 PetscCall(PetscArraycpy(a2, ptr2, sz2));
1337 PetscCall(PetscSortInt(sz1, a1));
1338 PetscCall(PetscSortInt(sz2, a2));
1339
1340 nmatch = 0;
1341 k = 0;
1342 for (i = 0; i < sz1; i++) {
1343 for (j = k; j < sz2; j++) {
1344 if (a1[i] == a2[j]) {
1345 k = j;
1346 nmatch++;
1347 break;
1348 }
1349 }
1350 }
1351 PetscCall(ISRestoreIndices(isrow, &ptr1));
1352 PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1353 PetscCall(PetscFree(a1));
1354 PetscCall(PetscFree(a2));
1355 if (nmatch < sz1) {
1356 *flg = PETSC_FALSE;
1357 } else {
1358 *flg = PETSC_TRUE;
1359 }
1360 PetscFunctionReturn(PETSC_SUCCESS);
1361 }
1362
MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)1363 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1364 {
1365 Mat C[2];
1366 IS iscol_local, isrow_local;
1367 PetscInt csize, csize_local, rsize;
1368 PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1369
1370 PetscFunctionBegin;
1371 PetscCall(ISGetLocalSize(iscol, &csize));
1372 PetscCall(ISGetLocalSize(isrow, &rsize));
1373 if (call == MAT_REUSE_MATRIX) {
1374 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1375 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1376 } else {
1377 PetscCall(ISAllGather(iscol, &iscol_local));
1378 PetscCall(ISSorted(iscol_local, &issorted));
1379 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1380 }
1381 PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1382 if (!isequal) {
1383 PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1384 isidentity = (PetscBool)(mat->cmap->N == csize_local);
1385 if (!isidentity) {
1386 if (call == MAT_REUSE_MATRIX) {
1387 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1388 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1389 } else {
1390 PetscCall(ISAllGather(isrow, &isrow_local));
1391 PetscCall(ISSorted(isrow_local, &issorted));
1392 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1393 }
1394 }
1395 }
1396 /* now call MatCreateSubMatrix_MPIBAIJ() */
1397 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1398 if (!isequal && !isidentity) {
1399 if (call == MAT_INITIAL_MATRIX) {
1400 IS intersect;
1401 PetscInt ni;
1402
1403 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1404 PetscCall(ISGetLocalSize(intersect, &ni));
1405 PetscCall(ISDestroy(&intersect));
1406 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1407 }
1408 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1409 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1410 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1411 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1412 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1413 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1414 PetscCall(MatDestroy(C));
1415 PetscCall(MatDestroy(C + 1));
1416 }
1417 if (call == MAT_INITIAL_MATRIX) {
1418 if (!isequal && !isidentity) {
1419 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1420 PetscCall(ISDestroy(&isrow_local));
1421 }
1422 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1423 PetscCall(ISDestroy(&iscol_local));
1424 }
1425 PetscFunctionReturn(PETSC_SUCCESS);
1426 }
1427
MatZeroEntries_MPISBAIJ(Mat A)1428 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1429 {
1430 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1431
1432 PetscFunctionBegin;
1433 PetscCall(MatZeroEntries(l->A));
1434 PetscCall(MatZeroEntries(l->B));
1435 PetscFunctionReturn(PETSC_SUCCESS);
1436 }
1437
MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo * info)1438 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1439 {
1440 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data;
1441 Mat A = a->A, B = a->B;
1442 PetscLogDouble isend[5], irecv[5];
1443
1444 PetscFunctionBegin;
1445 info->block_size = (PetscReal)matin->rmap->bs;
1446
1447 PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1448
1449 isend[0] = info->nz_used;
1450 isend[1] = info->nz_allocated;
1451 isend[2] = info->nz_unneeded;
1452 isend[3] = info->memory;
1453 isend[4] = info->mallocs;
1454
1455 PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1456
1457 isend[0] += info->nz_used;
1458 isend[1] += info->nz_allocated;
1459 isend[2] += info->nz_unneeded;
1460 isend[3] += info->memory;
1461 isend[4] += info->mallocs;
1462 if (flag == MAT_LOCAL) {
1463 info->nz_used = isend[0];
1464 info->nz_allocated = isend[1];
1465 info->nz_unneeded = isend[2];
1466 info->memory = isend[3];
1467 info->mallocs = isend[4];
1468 } else if (flag == MAT_GLOBAL_MAX) {
1469 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1470
1471 info->nz_used = irecv[0];
1472 info->nz_allocated = irecv[1];
1473 info->nz_unneeded = irecv[2];
1474 info->memory = irecv[3];
1475 info->mallocs = irecv[4];
1476 } else if (flag == MAT_GLOBAL_SUM) {
1477 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1478
1479 info->nz_used = irecv[0];
1480 info->nz_allocated = irecv[1];
1481 info->nz_unneeded = irecv[2];
1482 info->memory = irecv[3];
1483 info->mallocs = irecv[4];
1484 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1485 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1486 info->fill_ratio_needed = 0;
1487 info->factor_mallocs = 0;
1488 PetscFunctionReturn(PETSC_SUCCESS);
1489 }
1490
MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)1491 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1492 {
1493 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1494 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1495
1496 PetscFunctionBegin;
1497 switch (op) {
1498 case MAT_NEW_NONZERO_LOCATIONS:
1499 case MAT_NEW_NONZERO_ALLOCATION_ERR:
1500 case MAT_UNUSED_NONZERO_LOCATION_ERR:
1501 case MAT_KEEP_NONZERO_PATTERN:
1502 case MAT_NEW_NONZERO_LOCATION_ERR:
1503 MatCheckPreallocated(A, 1);
1504 PetscCall(MatSetOption(a->A, op, flg));
1505 PetscCall(MatSetOption(a->B, op, flg));
1506 break;
1507 case MAT_ROW_ORIENTED:
1508 MatCheckPreallocated(A, 1);
1509 a->roworiented = flg;
1510
1511 PetscCall(MatSetOption(a->A, op, flg));
1512 PetscCall(MatSetOption(a->B, op, flg));
1513 break;
1514 case MAT_IGNORE_OFF_PROC_ENTRIES:
1515 a->donotstash = flg;
1516 break;
1517 case MAT_USE_HASH_TABLE:
1518 a->ht_flag = flg;
1519 break;
1520 case MAT_HERMITIAN:
1521 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1522 #if defined(PETSC_USE_COMPLEX)
1523 if (flg) { /* need different mat-vec ops */
1524 A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1525 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian;
1526 A->ops->multtranspose = NULL;
1527 A->ops->multtransposeadd = NULL;
1528 }
1529 #endif
1530 break;
1531 case MAT_SPD:
1532 case MAT_SYMMETRIC:
1533 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1534 #if defined(PETSC_USE_COMPLEX)
1535 if (flg) { /* restore to use default mat-vec ops */
1536 A->ops->mult = MatMult_MPISBAIJ;
1537 A->ops->multadd = MatMultAdd_MPISBAIJ;
1538 A->ops->multtranspose = MatMult_MPISBAIJ;
1539 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1540 }
1541 #endif
1542 break;
1543 case MAT_STRUCTURALLY_SYMMETRIC:
1544 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1545 break;
1546 case MAT_IGNORE_LOWER_TRIANGULAR:
1547 case MAT_ERROR_LOWER_TRIANGULAR:
1548 aA->ignore_ltriangular = flg;
1549 break;
1550 case MAT_GETROW_UPPERTRIANGULAR:
1551 aA->getrow_utriangular = flg;
1552 break;
1553 default:
1554 break;
1555 }
1556 PetscFunctionReturn(PETSC_SUCCESS);
1557 }
1558
MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat * B)1559 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1560 {
1561 PetscFunctionBegin;
1562 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1563 if (reuse == MAT_INITIAL_MATRIX) {
1564 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1565 } else if (reuse == MAT_REUSE_MATRIX) {
1566 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1567 }
1568 PetscFunctionReturn(PETSC_SUCCESS);
1569 }
1570
MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)1571 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1572 {
1573 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1574 Mat a = baij->A, b = baij->B;
1575 PetscInt nv, m, n;
1576
1577 PetscFunctionBegin;
1578 if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1579
1580 PetscCall(MatGetLocalSize(mat, &m, &n));
1581 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1582
1583 PetscCall(VecGetLocalSize(rr, &nv));
1584 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1585
1586 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1587
1588 /* left diagonalscale the off-diagonal part */
1589 PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1590
1591 /* scale the diagonal part */
1592 PetscUseTypeMethod(a, diagonalscale, ll, rr);
1593
1594 /* right diagonalscale the off-diagonal part */
1595 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1597 PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599
MatSetUnfactored_MPISBAIJ(Mat A)1600 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1601 {
1602 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1603
1604 PetscFunctionBegin;
1605 PetscCall(MatSetUnfactored(a->A));
1606 PetscFunctionReturn(PETSC_SUCCESS);
1607 }
1608
1609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1610
MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool * flag)1611 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1612 {
1613 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1614 Mat a, b, c, d;
1615 PetscBool flg;
1616
1617 PetscFunctionBegin;
1618 a = matA->A;
1619 b = matA->B;
1620 c = matB->A;
1621 d = matB->B;
1622
1623 PetscCall(MatEqual(a, c, &flg));
1624 if (flg) PetscCall(MatEqual(b, d, &flg));
1625 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1626 PetscFunctionReturn(PETSC_SUCCESS);
1627 }
1628
MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)1629 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1630 {
1631 PetscBool isbaij;
1632
1633 PetscFunctionBegin;
1634 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1635 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1636 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1637 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1638 PetscCall(MatGetRowUpperTriangular(A));
1639 PetscCall(MatCopy_Basic(A, B, str));
1640 PetscCall(MatRestoreRowUpperTriangular(A));
1641 } else {
1642 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1643 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1644
1645 PetscCall(MatCopy(a->A, b->A, str));
1646 PetscCall(MatCopy(a->B, b->B, str));
1647 }
1648 PetscCall(PetscObjectStateIncrease((PetscObject)B));
1649 PetscFunctionReturn(PETSC_SUCCESS);
1650 }
1651
MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)1652 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1653 {
1654 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1655 PetscBLASInt bnz, one = 1;
1656 Mat_SeqSBAIJ *xa, *ya;
1657 Mat_SeqBAIJ *xb, *yb;
1658
1659 PetscFunctionBegin;
1660 if (str == SAME_NONZERO_PATTERN) {
1661 PetscScalar alpha = a;
1662 xa = (Mat_SeqSBAIJ *)xx->A->data;
1663 ya = (Mat_SeqSBAIJ *)yy->A->data;
1664 PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1665 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1666 xb = (Mat_SeqBAIJ *)xx->B->data;
1667 yb = (Mat_SeqBAIJ *)yy->B->data;
1668 PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1669 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1670 PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1671 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1672 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1673 PetscCall(MatAXPY_Basic(Y, a, X, str));
1674 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1675 } else {
1676 Mat B;
1677 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1678 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1679 PetscCall(MatGetRowUpperTriangular(X));
1680 PetscCall(MatGetRowUpperTriangular(Y));
1681 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1682 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1683 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1684 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1685 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1686 PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1687 PetscCall(MatSetType(B, MATMPISBAIJ));
1688 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1689 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1690 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1691 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1692 PetscCall(MatHeaderMerge(Y, &B));
1693 PetscCall(PetscFree(nnz_d));
1694 PetscCall(PetscFree(nnz_o));
1695 PetscCall(MatRestoreRowUpperTriangular(X));
1696 PetscCall(MatRestoreRowUpperTriangular(Y));
1697 }
1698 PetscFunctionReturn(PETSC_SUCCESS);
1699 }
1700
MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * B[])1701 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1702 {
1703 PetscInt i;
1704 PetscBool flg;
1705
1706 PetscFunctionBegin;
1707 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1708 for (i = 0; i < n; i++) {
1709 PetscCall(ISEqual(irow[i], icol[i], &flg));
1710 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1711 }
1712 PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714
MatShift_MPISBAIJ(Mat Y,PetscScalar a)1715 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1716 {
1717 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1718 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data;
1719
1720 PetscFunctionBegin;
1721 if (!Y->preallocated) {
1722 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1723 } else if (!aij->nz) {
1724 PetscInt nonew = aij->nonew;
1725 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1726 aij->nonew = nonew;
1727 }
1728 PetscCall(MatShift_Basic(Y, a));
1729 PetscFunctionReturn(PETSC_SUCCESS);
1730 }
1731
MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat * a)1732 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1733 {
1734 PetscFunctionBegin;
1735 *a = ((Mat_MPISBAIJ *)A->data)->A;
1736 PetscFunctionReturn(PETSC_SUCCESS);
1737 }
1738
MatEliminateZeros_MPISBAIJ(Mat A,PetscBool keep)1739 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1740 {
1741 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1742
1743 PetscFunctionBegin;
1744 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients
1745 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1746 PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748
1749 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1750 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1751 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1752
1753 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1754 MatGetRow_MPISBAIJ,
1755 MatRestoreRow_MPISBAIJ,
1756 MatMult_MPISBAIJ,
1757 /* 4*/ MatMultAdd_MPISBAIJ,
1758 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1759 MatMultAdd_MPISBAIJ,
1760 NULL,
1761 NULL,
1762 NULL,
1763 /* 10*/ NULL,
1764 NULL,
1765 NULL,
1766 MatSOR_MPISBAIJ,
1767 MatTranspose_MPISBAIJ,
1768 /* 15*/ MatGetInfo_MPISBAIJ,
1769 MatEqual_MPISBAIJ,
1770 MatGetDiagonal_MPISBAIJ,
1771 MatDiagonalScale_MPISBAIJ,
1772 MatNorm_MPISBAIJ,
1773 /* 20*/ MatAssemblyBegin_MPISBAIJ,
1774 MatAssemblyEnd_MPISBAIJ,
1775 MatSetOption_MPISBAIJ,
1776 MatZeroEntries_MPISBAIJ,
1777 /* 24*/ NULL,
1778 NULL,
1779 NULL,
1780 NULL,
1781 NULL,
1782 /* 29*/ MatSetUp_MPI_Hash,
1783 NULL,
1784 NULL,
1785 MatGetDiagonalBlock_MPISBAIJ,
1786 NULL,
1787 /* 34*/ MatDuplicate_MPISBAIJ,
1788 NULL,
1789 NULL,
1790 NULL,
1791 NULL,
1792 /* 39*/ MatAXPY_MPISBAIJ,
1793 MatCreateSubMatrices_MPISBAIJ,
1794 MatIncreaseOverlap_MPISBAIJ,
1795 MatGetValues_MPISBAIJ,
1796 MatCopy_MPISBAIJ,
1797 /* 44*/ NULL,
1798 MatScale_MPISBAIJ,
1799 MatShift_MPISBAIJ,
1800 NULL,
1801 NULL,
1802 /* 49*/ NULL,
1803 NULL,
1804 NULL,
1805 NULL,
1806 NULL,
1807 /* 54*/ NULL,
1808 NULL,
1809 MatSetUnfactored_MPISBAIJ,
1810 NULL,
1811 MatSetValuesBlocked_MPISBAIJ,
1812 /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1813 NULL,
1814 NULL,
1815 NULL,
1816 NULL,
1817 /* 64*/ NULL,
1818 NULL,
1819 NULL,
1820 NULL,
1821 MatGetRowMaxAbs_MPISBAIJ,
1822 /* 69*/ NULL,
1823 MatConvert_MPISBAIJ_Basic,
1824 NULL,
1825 NULL,
1826 NULL,
1827 NULL,
1828 NULL,
1829 NULL,
1830 NULL,
1831 MatLoad_MPISBAIJ,
1832 /* 79*/ NULL,
1833 NULL,
1834 NULL,
1835 NULL,
1836 NULL,
1837 /* 84*/ NULL,
1838 NULL,
1839 NULL,
1840 NULL,
1841 NULL,
1842 /* 89*/ NULL,
1843 NULL,
1844 NULL,
1845 NULL,
1846 MatConjugate_MPISBAIJ,
1847 /* 94*/ NULL,
1848 NULL,
1849 MatRealPart_MPISBAIJ,
1850 MatImaginaryPart_MPISBAIJ,
1851 MatGetRowUpperTriangular_MPISBAIJ,
1852 /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ,
1853 NULL,
1854 NULL,
1855 NULL,
1856 NULL,
1857 /*104*/ NULL,
1858 NULL,
1859 NULL,
1860 NULL,
1861 NULL,
1862 /*109*/ NULL,
1863 NULL,
1864 NULL,
1865 NULL,
1866 NULL,
1867 /*114*/ NULL,
1868 NULL,
1869 NULL,
1870 NULL,
1871 NULL,
1872 /*119*/ NULL,
1873 NULL,
1874 NULL,
1875 NULL,
1876 NULL,
1877 /*124*/ NULL,
1878 NULL,
1879 MatSetBlockSizes_Default,
1880 NULL,
1881 NULL,
1882 /*129*/ NULL,
1883 MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1884 NULL,
1885 NULL,
1886 NULL,
1887 /*134*/ NULL,
1888 NULL,
1889 MatEliminateZeros_MPISBAIJ,
1890 NULL,
1891 NULL,
1892 /*139*/ NULL,
1893 NULL,
1894 MatCopyHashToXAIJ_MPI_Hash,
1895 NULL,
1896 NULL};
1897
MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)1898 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1899 {
1900 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1901 PetscInt i, mbs, Mbs;
1902 PetscMPIInt size;
1903
1904 PetscFunctionBegin;
1905 if (B->hash_active) {
1906 B->ops[0] = b->cops;
1907 B->hash_active = PETSC_FALSE;
1908 }
1909 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1910 PetscCall(MatSetBlockSize(B, bs));
1911 PetscCall(PetscLayoutSetUp(B->rmap));
1912 PetscCall(PetscLayoutSetUp(B->cmap));
1913 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1914 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1915 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1916
1917 mbs = B->rmap->n / bs;
1918 Mbs = B->rmap->N / bs;
1919 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1920
1921 B->rmap->bs = bs;
1922 b->bs2 = bs * bs;
1923 b->mbs = mbs;
1924 b->Mbs = Mbs;
1925 b->nbs = B->cmap->n / bs;
1926 b->Nbs = B->cmap->N / bs;
1927
1928 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1929 b->rstartbs = B->rmap->rstart / bs;
1930 b->rendbs = B->rmap->rend / bs;
1931
1932 b->cstartbs = B->cmap->rstart / bs;
1933 b->cendbs = B->cmap->rend / bs;
1934
1935 #if defined(PETSC_USE_CTABLE)
1936 PetscCall(PetscHMapIDestroy(&b->colmap));
1937 #else
1938 PetscCall(PetscFree(b->colmap));
1939 #endif
1940 PetscCall(PetscFree(b->garray));
1941 PetscCall(VecDestroy(&b->lvec));
1942 PetscCall(VecScatterDestroy(&b->Mvctx));
1943 PetscCall(VecDestroy(&b->slvec0));
1944 PetscCall(VecDestroy(&b->slvec0b));
1945 PetscCall(VecDestroy(&b->slvec1));
1946 PetscCall(VecDestroy(&b->slvec1a));
1947 PetscCall(VecDestroy(&b->slvec1b));
1948 PetscCall(VecScatterDestroy(&b->sMvctx));
1949
1950 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1951
1952 MatSeqXAIJGetOptions_Private(b->B);
1953 PetscCall(MatDestroy(&b->B));
1954 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1955 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1956 PetscCall(MatSetType(b->B, MATSEQBAIJ));
1957 MatSeqXAIJRestoreOptions_Private(b->B);
1958
1959 MatSeqXAIJGetOptions_Private(b->A);
1960 PetscCall(MatDestroy(&b->A));
1961 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1962 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1963 PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1964 MatSeqXAIJRestoreOptions_Private(b->A);
1965
1966 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1967 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1968
1969 B->preallocated = PETSC_TRUE;
1970 B->was_assembled = PETSC_FALSE;
1971 B->assembled = PETSC_FALSE;
1972 PetscFunctionReturn(PETSC_SUCCESS);
1973 }
1974
MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])1975 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1976 {
1977 PetscInt m, rstart, cend;
1978 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1979 const PetscInt *JJ = NULL;
1980 PetscScalar *values = NULL;
1981 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1982 PetscBool nooffprocentries;
1983
1984 PetscFunctionBegin;
1985 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1986 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1987 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1988 PetscCall(PetscLayoutSetUp(B->rmap));
1989 PetscCall(PetscLayoutSetUp(B->cmap));
1990 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1991 m = B->rmap->n / bs;
1992 rstart = B->rmap->rstart / bs;
1993 cend = B->cmap->rend / bs;
1994
1995 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
1996 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
1997 for (i = 0; i < m; i++) {
1998 nz = ii[i + 1] - ii[i];
1999 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2000 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2001 JJ = jj + ii[i];
2002 bd = 0;
2003 for (j = 0; j < nz; j++) {
2004 if (*JJ >= i + rstart) break;
2005 JJ++;
2006 bd++;
2007 }
2008 d = 0;
2009 for (; j < nz; j++) {
2010 if (*JJ++ >= cend) break;
2011 d++;
2012 }
2013 d_nnz[i] = d;
2014 o_nnz[i] = nz - d - bd;
2015 nz = nz - bd;
2016 nz_max = PetscMax(nz_max, nz);
2017 }
2018 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2019 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2020 PetscCall(PetscFree2(d_nnz, o_nnz));
2021
2022 values = (PetscScalar *)V;
2023 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2024 for (i = 0; i < m; i++) {
2025 PetscInt row = i + rstart;
2026 PetscInt ncols = ii[i + 1] - ii[i];
2027 const PetscInt *icols = jj + ii[i];
2028 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2029 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2030 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2031 } else { /* block ordering does not match so we can only insert one block at a time. */
2032 PetscInt j;
2033 for (j = 0; j < ncols; j++) {
2034 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2035 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2036 }
2037 }
2038 }
2039
2040 if (!V) PetscCall(PetscFree(values));
2041 nooffprocentries = B->nooffprocentries;
2042 B->nooffprocentries = PETSC_TRUE;
2043 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2044 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2045 B->nooffprocentries = nooffprocentries;
2046
2047 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2048 PetscFunctionReturn(PETSC_SUCCESS);
2049 }
2050
2051 /*MC
2052 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2053 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2054 the matrix is stored.
2055
2056 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2057 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2058
2059 Options Database Key:
2060 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2061
2062 Level: beginner
2063
2064 Note:
2065 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2066 diagonal portion of the matrix of each process has to less than or equal the number of columns.
2067
2068 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2069 M*/
2070
MatCreate_MPISBAIJ(Mat B)2071 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2072 {
2073 Mat_MPISBAIJ *b;
2074 PetscBool flg = PETSC_FALSE;
2075
2076 PetscFunctionBegin;
2077 PetscCall(PetscNew(&b));
2078 B->data = (void *)b;
2079 B->ops[0] = MatOps_Values;
2080
2081 B->ops->destroy = MatDestroy_MPISBAIJ;
2082 B->ops->view = MatView_MPISBAIJ;
2083 B->assembled = PETSC_FALSE;
2084 B->insertmode = NOT_SET_VALUES;
2085
2086 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2087 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2088
2089 /* build local table of row and column ownerships */
2090 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2091
2092 /* build cache for off array entries formed */
2093 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2094
2095 b->donotstash = PETSC_FALSE;
2096 b->colmap = NULL;
2097 b->garray = NULL;
2098 b->roworiented = PETSC_TRUE;
2099
2100 /* stuff used in block assembly */
2101 b->barray = NULL;
2102
2103 /* stuff used for matrix vector multiply */
2104 b->lvec = NULL;
2105 b->Mvctx = NULL;
2106 b->slvec0 = NULL;
2107 b->slvec0b = NULL;
2108 b->slvec1 = NULL;
2109 b->slvec1a = NULL;
2110 b->slvec1b = NULL;
2111 b->sMvctx = NULL;
2112
2113 /* stuff for MatGetRow() */
2114 b->rowindices = NULL;
2115 b->rowvalues = NULL;
2116 b->getrowactive = PETSC_FALSE;
2117
2118 /* hash table stuff */
2119 b->ht = NULL;
2120 b->hd = NULL;
2121 b->ht_size = 0;
2122 b->ht_flag = PETSC_FALSE;
2123 b->ht_fact = 0;
2124 b->ht_total_ct = 0;
2125 b->ht_insert_ct = 0;
2126
2127 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2128 b->ijonly = PETSC_FALSE;
2129
2130 b->in_loc = NULL;
2131 b->v_loc = NULL;
2132 b->n_loc = 0;
2133
2134 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2135 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2136 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2137 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2138 #if defined(PETSC_HAVE_ELEMENTAL)
2139 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2140 #endif
2141 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
2142 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2143 #endif
2144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2145 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2146
2147 B->symmetric = PETSC_BOOL3_TRUE;
2148 B->structurally_symmetric = PETSC_BOOL3_TRUE;
2149 B->symmetry_eternal = PETSC_TRUE;
2150 B->structural_symmetry_eternal = PETSC_TRUE;
2151 #if !defined(PETSC_USE_COMPLEX)
2152 B->hermitian = PETSC_BOOL3_TRUE;
2153 #endif
2154
2155 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2156 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2157 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2158 if (flg) {
2159 PetscReal fact = 1.39;
2160 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2161 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2162 if (fact <= 1.0) fact = 1.39;
2163 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2164 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2165 }
2166 PetscOptionsEnd();
2167 PetscFunctionReturn(PETSC_SUCCESS);
2168 }
2169
2170 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2171 /*MC
2172 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2173
2174 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2175 and `MATMPISBAIJ` otherwise.
2176
2177 Options Database Key:
2178 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2179
2180 Level: beginner
2181
2182 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2183 M*/
2184
2185 /*@
2186 MatMPISBAIJSetPreallocation - For good matrix assembly performance
2187 the user should preallocate the matrix storage by setting the parameters
2188 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2189 performance can be increased by more than a factor of 50.
2190
2191 Collective
2192
2193 Input Parameters:
2194 + B - the matrix
2195 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2196 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2197 . d_nz - number of block nonzeros per block row in diagonal portion of local
2198 submatrix (same for all local rows)
2199 . d_nnz - array containing the number of block nonzeros in the various block rows
2200 in the upper triangular and diagonal part of the in diagonal portion of the local
2201 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room
2202 for the diagonal entry and set a value even if it is zero.
2203 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2204 submatrix (same for all local rows).
2205 - o_nnz - array containing the number of nonzeros in the various block rows of the
2206 off-diagonal portion of the local submatrix that is right of the diagonal
2207 (possibly different for each block row) or `NULL`.
2208
2209 Options Database Keys:
2210 + -mat_no_unroll - uses code that does not unroll the loops in the
2211 block calculations (much slower)
2212 - -mat_block_size - size of the blocks to use
2213
2214 Level: intermediate
2215
2216 Notes:
2217
2218 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2219 than it must be used on all processors that share the object for that argument.
2220
2221 If the *_nnz parameter is given then the *_nz parameter is ignored
2222
2223 Storage Information:
2224 For a square global matrix we define each processor's diagonal portion
2225 to be its local rows and the corresponding columns (a square submatrix);
2226 each processor's off-diagonal portion encompasses the remainder of the
2227 local matrix (a rectangular submatrix).
2228
2229 The user can specify preallocated storage for the diagonal part of
2230 the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2231 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2232 memory allocation. Likewise, specify preallocated storage for the
2233 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2234
2235 You can call `MatGetInfo()` to get information on how effective the preallocation was;
2236 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2237 You can also run with the option `-info` and look for messages with the string
2238 malloc in them to see if additional memory allocation was needed.
2239
2240 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2241 the figure below we depict these three local rows and all columns (0-11).
2242
2243 .vb
2244 0 1 2 3 4 5 6 7 8 9 10 11
2245 --------------------------
2246 row 3 |. . . d d d o o o o o o
2247 row 4 |. . . d d d o o o o o o
2248 row 5 |. . . d d d o o o o o o
2249 --------------------------
2250 .ve
2251
2252 Thus, any entries in the d locations are stored in the d (diagonal)
2253 submatrix, and any entries in the o locations are stored in the
2254 o (off-diagonal) submatrix. Note that the d matrix is stored in
2255 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2256
2257 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2258 plus the diagonal part of the d matrix,
2259 and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2260
2261 In general, for PDE problems in which most nonzeros are near the diagonal,
2262 one expects `d_nz` >> `o_nz`.
2263
2264 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2265 @*/
MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])2266 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2267 {
2268 PetscFunctionBegin;
2269 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2270 PetscValidType(B, 1);
2271 PetscValidLogicalCollectiveInt(B, bs, 2);
2272 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2273 PetscFunctionReturn(PETSC_SUCCESS);
2274 }
2275
2276 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2277 /*@
2278 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2279 (block compressed row). For good matrix assembly performance
2280 the user should preallocate the matrix storage by setting the parameters
2281 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2282
2283 Collective
2284
2285 Input Parameters:
2286 + comm - MPI communicator
2287 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2288 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2289 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2290 This value should be the same as the local size used in creating the
2291 y vector for the matrix-vector product y = Ax.
2292 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2293 This value should be the same as the local size used in creating the
2294 x vector for the matrix-vector product y = Ax.
2295 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2296 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2297 . d_nz - number of block nonzeros per block row in diagonal portion of local
2298 submatrix (same for all local rows)
2299 . d_nnz - array containing the number of block nonzeros in the various block rows
2300 in the upper triangular portion of the in diagonal portion of the local
2301 (possibly different for each block block row) or `NULL`.
2302 If you plan to factor the matrix you must leave room for the diagonal entry and
2303 set its value even if it is zero.
2304 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2305 submatrix (same for all local rows).
2306 - o_nnz - array containing the number of nonzeros in the various block rows of the
2307 off-diagonal portion of the local submatrix (possibly different for
2308 each block row) or `NULL`.
2309
2310 Output Parameter:
2311 . A - the matrix
2312
2313 Options Database Keys:
2314 + -mat_no_unroll - uses code that does not unroll the loops in the
2315 block calculations (much slower)
2316 . -mat_block_size - size of the blocks to use
2317 - -mat_mpi - use the parallel matrix data structures even on one processor
2318 (defaults to using SeqBAIJ format on one processor)
2319
2320 Level: intermediate
2321
2322 Notes:
2323 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2324 MatXXXXSetPreallocation() paradigm instead of this routine directly.
2325 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2326
2327 The number of rows and columns must be divisible by blocksize.
2328 This matrix type does not support complex Hermitian operation.
2329
2330 The user MUST specify either the local or global matrix dimensions
2331 (possibly both).
2332
2333 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2334 than it must be used on all processors that share the object for that argument.
2335
2336 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2337 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2338
2339 If the *_nnz parameter is given then the *_nz parameter is ignored
2340
2341 Storage Information:
2342 For a square global matrix we define each processor's diagonal portion
2343 to be its local rows and the corresponding columns (a square submatrix);
2344 each processor's off-diagonal portion encompasses the remainder of the
2345 local matrix (a rectangular submatrix).
2346
2347 The user can specify preallocated storage for the diagonal part of
2348 the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2349 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2350 memory allocation. Likewise, specify preallocated storage for the
2351 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2352
2353 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2354 the figure below we depict these three local rows and all columns (0-11).
2355
2356 .vb
2357 0 1 2 3 4 5 6 7 8 9 10 11
2358 --------------------------
2359 row 3 |. . . d d d o o o o o o
2360 row 4 |. . . d d d o o o o o o
2361 row 5 |. . . d d d o o o o o o
2362 --------------------------
2363 .ve
2364
2365 Thus, any entries in the d locations are stored in the d (diagonal)
2366 submatrix, and any entries in the o locations are stored in the
2367 o (off-diagonal) submatrix. Note that the d matrix is stored in
2368 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2369
2370 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2371 plus the diagonal part of the d matrix,
2372 and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2373 In general, for PDE problems in which most nonzeros are near the diagonal,
2374 one expects `d_nz` >> `o_nz`.
2375
2376 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2377 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2378 @*/
MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)2379 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2380 {
2381 PetscMPIInt size;
2382
2383 PetscFunctionBegin;
2384 PetscCall(MatCreate(comm, A));
2385 PetscCall(MatSetSizes(*A, m, n, M, N));
2386 PetscCallMPI(MPI_Comm_size(comm, &size));
2387 if (size > 1) {
2388 PetscCall(MatSetType(*A, MATMPISBAIJ));
2389 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2390 } else {
2391 PetscCall(MatSetType(*A, MATSEQSBAIJ));
2392 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2393 }
2394 PetscFunctionReturn(PETSC_SUCCESS);
2395 }
2396
MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)2397 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2398 {
2399 Mat mat;
2400 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2401 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2402 PetscScalar *array;
2403
2404 PetscFunctionBegin;
2405 *newmat = NULL;
2406
2407 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2408 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2409 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2410 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2411 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2412
2413 if (matin->hash_active) {
2414 PetscCall(MatSetUp(mat));
2415 } else {
2416 mat->factortype = matin->factortype;
2417 mat->preallocated = PETSC_TRUE;
2418 mat->assembled = PETSC_TRUE;
2419 mat->insertmode = NOT_SET_VALUES;
2420
2421 a = (Mat_MPISBAIJ *)mat->data;
2422 a->bs2 = oldmat->bs2;
2423 a->mbs = oldmat->mbs;
2424 a->nbs = oldmat->nbs;
2425 a->Mbs = oldmat->Mbs;
2426 a->Nbs = oldmat->Nbs;
2427
2428 a->size = oldmat->size;
2429 a->rank = oldmat->rank;
2430 a->donotstash = oldmat->donotstash;
2431 a->roworiented = oldmat->roworiented;
2432 a->rowindices = NULL;
2433 a->rowvalues = NULL;
2434 a->getrowactive = PETSC_FALSE;
2435 a->barray = NULL;
2436 a->rstartbs = oldmat->rstartbs;
2437 a->rendbs = oldmat->rendbs;
2438 a->cstartbs = oldmat->cstartbs;
2439 a->cendbs = oldmat->cendbs;
2440
2441 /* hash table stuff */
2442 a->ht = NULL;
2443 a->hd = NULL;
2444 a->ht_size = 0;
2445 a->ht_flag = oldmat->ht_flag;
2446 a->ht_fact = oldmat->ht_fact;
2447 a->ht_total_ct = 0;
2448 a->ht_insert_ct = 0;
2449
2450 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2451 if (oldmat->colmap) {
2452 #if defined(PETSC_USE_CTABLE)
2453 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2454 #else
2455 PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2456 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2457 #endif
2458 } else a->colmap = NULL;
2459
2460 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2461 PetscCall(PetscMalloc1(len, &a->garray));
2462 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2463 } else a->garray = NULL;
2464
2465 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2466 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2467 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2468
2469 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2470 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2471
2472 PetscCall(VecGetLocalSize(a->slvec1, &nt));
2473 PetscCall(VecGetArray(a->slvec1, &array));
2474 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2475 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2476 PetscCall(VecRestoreArray(a->slvec1, &array));
2477 PetscCall(VecGetArray(a->slvec0, &array));
2478 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2479 PetscCall(VecRestoreArray(a->slvec0, &array));
2480
2481 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2482 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2483 a->sMvctx = oldmat->sMvctx;
2484
2485 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2486 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2487 }
2488 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2489 *newmat = mat;
2490 PetscFunctionReturn(PETSC_SUCCESS);
2491 }
2492
2493 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2494 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2495
MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)2496 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2497 {
2498 PetscBool isbinary;
2499
2500 PetscFunctionBegin;
2501 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2502 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2503 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2504 PetscFunctionReturn(PETSC_SUCCESS);
2505 }
2506
MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])2507 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2508 {
2509 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2510 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data;
2511 PetscReal atmp;
2512 PetscReal *work, *svalues, *rvalues;
2513 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2514 PetscMPIInt rank, size;
2515 PetscInt *rowners_bs, count, source;
2516 PetscScalar *va;
2517 MatScalar *ba;
2518 MPI_Status stat;
2519
2520 PetscFunctionBegin;
2521 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2522 PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2523 PetscCall(VecGetArray(v, &va));
2524
2525 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2526 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2527
2528 bs = A->rmap->bs;
2529 mbs = a->mbs;
2530 Mbs = a->Mbs;
2531 ba = b->a;
2532 bi = b->i;
2533 bj = b->j;
2534
2535 /* find ownerships */
2536 rowners_bs = A->rmap->range;
2537
2538 /* each proc creates an array to be distributed */
2539 PetscCall(PetscCalloc1(bs * Mbs, &work));
2540
2541 /* row_max for B */
2542 if (rank != size - 1) {
2543 for (i = 0; i < mbs; i++) {
2544 ncols = bi[1] - bi[0];
2545 bi++;
2546 brow = bs * i;
2547 for (j = 0; j < ncols; j++) {
2548 bcol = bs * (*bj);
2549 for (kcol = 0; kcol < bs; kcol++) {
2550 col = bcol + kcol; /* local col index */
2551 col += rowners_bs[rank + 1]; /* global col index */
2552 for (krow = 0; krow < bs; krow++) {
2553 atmp = PetscAbsScalar(*ba);
2554 ba++;
2555 row = brow + krow; /* local row index */
2556 if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2557 if (work[col] < atmp) work[col] = atmp;
2558 }
2559 }
2560 bj++;
2561 }
2562 }
2563
2564 /* send values to its owners */
2565 for (PetscMPIInt dest = rank + 1; dest < size; dest++) {
2566 svalues = work + rowners_bs[dest];
2567 count = rowners_bs[dest + 1] - rowners_bs[dest];
2568 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2569 }
2570 }
2571
2572 /* receive values */
2573 if (rank) {
2574 rvalues = work;
2575 count = rowners_bs[rank + 1] - rowners_bs[rank];
2576 for (source = 0; source < rank; source++) {
2577 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2578 /* process values */
2579 for (i = 0; i < count; i++) {
2580 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2581 }
2582 }
2583 }
2584
2585 PetscCall(VecRestoreArray(v, &va));
2586 PetscCall(PetscFree(work));
2587 PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589
MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)2590 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2591 {
2592 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
2593 PetscInt mbs = mat->mbs, bs = matin->rmap->bs;
2594 PetscScalar *x, *ptr, *from;
2595 Vec bb1;
2596 const PetscScalar *b;
2597
2598 PetscFunctionBegin;
2599 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2600 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2601
2602 if (flag == SOR_APPLY_UPPER) {
2603 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2604 PetscFunctionReturn(PETSC_SUCCESS);
2605 }
2606
2607 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2608 if (flag & SOR_ZERO_INITIAL_GUESS) {
2609 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2610 its--;
2611 }
2612
2613 PetscCall(VecDuplicate(bb, &bb1));
2614 while (its--) {
2615 /* lower triangular part: slvec0b = - B^T*xx */
2616 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2617
2618 /* copy xx into slvec0a */
2619 PetscCall(VecGetArray(mat->slvec0, &ptr));
2620 PetscCall(VecGetArray(xx, &x));
2621 PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2622 PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2623
2624 PetscCall(VecScale(mat->slvec0, -1.0));
2625
2626 /* copy bb into slvec1a */
2627 PetscCall(VecGetArray(mat->slvec1, &ptr));
2628 PetscCall(VecGetArrayRead(bb, &b));
2629 PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2630 PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2631
2632 /* set slvec1b = 0 */
2633 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2634 PetscCall(VecZeroEntries(mat->slvec1b));
2635
2636 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2637 PetscCall(VecRestoreArray(xx, &x));
2638 PetscCall(VecRestoreArrayRead(bb, &b));
2639 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2640
2641 /* upper triangular part: bb1 = bb1 - B*x */
2642 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2643
2644 /* local diagonal sweep */
2645 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2646 }
2647 PetscCall(VecDestroy(&bb1));
2648 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2649 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2650 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2651 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2652 } else if (flag & SOR_EISENSTAT) {
2653 Vec xx1;
2654 PetscBool hasop;
2655 const PetscScalar *diag;
2656 PetscScalar *sl, scale = (omega - 2.0) / omega;
2657 PetscInt i, n;
2658
2659 if (!mat->xx1) {
2660 PetscCall(VecDuplicate(bb, &mat->xx1));
2661 PetscCall(VecDuplicate(bb, &mat->bb1));
2662 }
2663 xx1 = mat->xx1;
2664 bb1 = mat->bb1;
2665
2666 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2667
2668 if (!mat->diag) {
2669 /* this is wrong for same matrix with new nonzero values */
2670 PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2671 PetscCall(MatGetDiagonal(matin, mat->diag));
2672 }
2673 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2674
2675 if (hasop) {
2676 PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2677 PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2678 } else {
2679 /*
2680 These two lines are replaced by code that may be a bit faster for a good compiler
2681 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2682 PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2683 */
2684 PetscCall(VecGetArray(mat->slvec1a, &sl));
2685 PetscCall(VecGetArrayRead(mat->diag, &diag));
2686 PetscCall(VecGetArrayRead(bb, &b));
2687 PetscCall(VecGetArray(xx, &x));
2688 PetscCall(VecGetLocalSize(xx, &n));
2689 if (omega == 1.0) {
2690 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2691 PetscCall(PetscLogFlops(2.0 * n));
2692 } else {
2693 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2694 PetscCall(PetscLogFlops(3.0 * n));
2695 }
2696 PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2697 PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2698 PetscCall(VecRestoreArrayRead(bb, &b));
2699 PetscCall(VecRestoreArray(xx, &x));
2700 }
2701
2702 /* multiply off-diagonal portion of matrix */
2703 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2704 PetscCall(VecZeroEntries(mat->slvec1b));
2705 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2706 PetscCall(VecGetArray(mat->slvec0, &from));
2707 PetscCall(VecGetArray(xx, &x));
2708 PetscCall(PetscArraycpy(from, x, bs * mbs));
2709 PetscCall(VecRestoreArray(mat->slvec0, &from));
2710 PetscCall(VecRestoreArray(xx, &x));
2711 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2712 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2713 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2714
2715 /* local sweep */
2716 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2717 PetscCall(VecAXPY(xx, 1.0, xx1));
2718 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2719 PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721
2722 /*@
2723 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2724
2725 Collective
2726
2727 Input Parameters:
2728 + comm - MPI communicator
2729 . bs - the block size, only a block size of 1 is supported
2730 . m - number of local rows (Cannot be `PETSC_DECIDE`)
2731 . n - This value should be the same as the local size used in creating the
2732 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2733 calculated if `N` is given) For square matrices `n` is almost always `m`.
2734 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2735 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2736 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2737 . j - column indices
2738 - a - matrix values
2739
2740 Output Parameter:
2741 . mat - the matrix
2742
2743 Level: intermediate
2744
2745 Notes:
2746 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2747 thus you CANNOT change the matrix entries by changing the values of `a` after you have
2748 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2749
2750 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2751
2752 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2753 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2754 @*/
MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)2755 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2756 {
2757 PetscFunctionBegin;
2758 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2759 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2760 PetscCall(MatCreate(comm, mat));
2761 PetscCall(MatSetSizes(*mat, m, n, M, N));
2762 PetscCall(MatSetType(*mat, MATMPISBAIJ));
2763 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2764 PetscFunctionReturn(PETSC_SUCCESS);
2765 }
2766
2767 /*@
2768 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2769
2770 Collective
2771
2772 Input Parameters:
2773 + B - the matrix
2774 . bs - the block size
2775 . i - the indices into `j` for the start of each local row (indices start with zero)
2776 . j - the column indices for each local row (indices start with zero) these must be sorted for each row
2777 - v - optional values in the matrix, pass `NULL` if not provided
2778
2779 Level: advanced
2780
2781 Notes:
2782 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2783 thus you CANNOT change the matrix entries by changing the values of `v` after you have
2784 called this routine.
2785
2786 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2787 and usually the numerical values as well
2788
2789 Any entries passed in that are below the diagonal are ignored
2790
2791 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2792 `MatCreateMPISBAIJWithArrays()`
2793 @*/
MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])2794 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2795 {
2796 PetscFunctionBegin;
2797 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2798 PetscFunctionReturn(PETSC_SUCCESS);
2799 }
2800
MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)2801 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2802 {
2803 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs;
2804 PetscInt *indx;
2805 PetscScalar *values;
2806
2807 PetscFunctionBegin;
2808 PetscCall(MatGetSize(inmat, &m, &N));
2809 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2810 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2811 PetscInt *dnz, *onz, mbs, Nbs, nbs;
2812 PetscInt *bindx, rmax = a->rmax, j;
2813 PetscMPIInt rank, size;
2814
2815 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2816 mbs = m / bs;
2817 Nbs = N / cbs;
2818 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2819 nbs = n / cbs;
2820
2821 PetscCall(PetscMalloc1(rmax, &bindx));
2822 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2823
2824 PetscCallMPI(MPI_Comm_rank(comm, &rank));
2825 PetscCallMPI(MPI_Comm_size(comm, &size));
2826 if (rank == size - 1) {
2827 /* Check sum(nbs) = Nbs */
2828 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2829 }
2830
2831 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2832 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2833 for (i = 0; i < mbs; i++) {
2834 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2835 nnz = nnz / bs;
2836 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2837 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2838 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2839 }
2840 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2841 PetscCall(PetscFree(bindx));
2842
2843 PetscCall(MatCreate(comm, outmat));
2844 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2845 PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2846 PetscCall(MatSetType(*outmat, MATSBAIJ));
2847 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2848 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2849 MatPreallocateEnd(dnz, onz);
2850 }
2851
2852 /* numeric phase */
2853 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2854 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2855
2856 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2857 for (i = 0; i < m; i++) {
2858 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2859 Ii = i + rstart;
2860 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2861 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2862 }
2863 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2864 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2865 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2866 PetscFunctionReturn(PETSC_SUCCESS);
2867 }
2868