1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6
MatDestroy_MPIBAIJ(Mat mat)7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10
11 PetscFunctionBegin;
12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13 PetscCall(MatStashDestroy_Private(&mat->stash));
14 PetscCall(MatStashDestroy_Private(&mat->bstash));
15 PetscCall(MatDestroy(&baij->A));
16 PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18 PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20 PetscCall(PetscFree(baij->colmap));
21 #endif
22 PetscCall(PetscFree(baij->garray));
23 PetscCall(VecDestroy(&baij->lvec));
24 PetscCall(VecScatterDestroy(&baij->Mvctx));
25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26 PetscCall(PetscFree(baij->barray));
27 PetscCall(PetscFree2(baij->hd, baij->ht));
28 PetscCall(PetscFree(baij->rangebs));
29 PetscCall(PetscFree(mat->data));
30
31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45 PetscFunctionReturn(PETSC_SUCCESS);
46 }
47
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56
MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61 PetscScalar *vv;
62 Vec vB, vA;
63 const PetscScalar *va, *vb;
64
65 PetscFunctionBegin;
66 PetscCall(MatCreateVecs(a->A, NULL, &vA));
67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68
69 PetscCall(VecGetArrayRead(vA, &va));
70 if (idx) {
71 for (i = 0; i < m; i++) {
72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73 }
74 }
75
76 PetscCall(MatCreateVecs(a->B, NULL, &vB));
77 PetscCall(PetscMalloc1(m, &idxb));
78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79
80 PetscCall(VecGetArrayWrite(v, &vv));
81 PetscCall(VecGetArrayRead(vB, &vb));
82 for (i = 0; i < m; i++) {
83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84 vv[i] = vb[i];
85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86 } else {
87 vv[i] = va[i];
88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89 }
90 }
91 PetscCall(VecRestoreArrayWrite(v, &vv));
92 PetscCall(VecRestoreArrayRead(vA, &va));
93 PetscCall(VecRestoreArrayRead(vB, &vb));
94 PetscCall(PetscFree(idxb));
95 PetscCall(VecDestroy(&vA));
96 PetscCall(VecDestroy(&vB));
97 PetscFunctionReturn(PETSC_SUCCESS);
98 }
99
MatGetRowSumAbs_MPIBAIJ(Mat A,Vec v)100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103 Vec vB, vA;
104
105 PetscFunctionBegin;
106 PetscCall(MatCreateVecs(a->A, NULL, &vA));
107 PetscCall(MatGetRowSumAbs(a->A, vA));
108 PetscCall(MatCreateVecs(a->B, NULL, &vB));
109 PetscCall(MatGetRowSumAbs(a->B, vB));
110 PetscCall(VecAXPY(vA, 1.0, vB));
111 PetscCall(VecDestroy(&vB));
112 PetscCall(VecCopy(vA, v));
113 PetscCall(VecDestroy(&vA));
114 PetscFunctionReturn(PETSC_SUCCESS);
115 }
116
MatStoreValues_MPIBAIJ(Mat mat)117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120
121 PetscFunctionBegin;
122 PetscCall(MatStoreValues(aij->A));
123 PetscCall(MatStoreValues(aij->B));
124 PetscFunctionReturn(PETSC_SUCCESS);
125 }
126
MatRetrieveValues_MPIBAIJ(Mat mat)127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130
131 PetscFunctionBegin;
132 PetscCall(MatRetrieveValues(aij->A));
133 PetscCall(MatRetrieveValues(aij->B));
134 PetscFunctionReturn(PETSC_SUCCESS);
135 }
136
137 /*
138 Local utility routine that creates a mapping from the global column
139 number to the local number in the off-diagonal part of the local
140 storage of the matrix. This is done in a non scalable way since the
141 length of colmap equals the global matrix length.
142 */
MatCreateColmap_MPIBAIJ_Private(Mat mat)143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data;
147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs;
148
149 PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157 PetscFunctionReturn(PETSC_SUCCESS);
158 }
159
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161 do { \
162 brow = row / bs; \
163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \
164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165 rmax = aimax[brow]; \
166 nrow = ailen[brow]; \
167 bcol = col / bs; \
168 ridx = row % bs; \
169 cidx = col % bs; \
170 low = 0; \
171 high = nrow; \
172 while (high - low > 3) { \
173 t = (low + high) / 2; \
174 if (rp[t] > bcol) high = t; \
175 else low = t; \
176 } \
177 for (_i = low; _i < high; _i++) { \
178 if (rp[_i] > bcol) break; \
179 if (rp[_i] == bcol) { \
180 bap = ap + bs2 * _i + bs * cidx + ridx; \
181 if (addv == ADD_VALUES) *bap += value; \
182 else *bap = value; \
183 goto a_noinsert; \
184 } \
185 } \
186 if (a->nonew == 1) goto a_noinsert; \
187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189 N = nrow++ - 1; \
190 /* shift up all the later entries in this row */ \
191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194 rp[_i] = bcol; \
195 ap[bs2 * _i + bs * cidx + ridx] = value; \
196 a_noinsert:; \
197 ailen[brow] = nrow; \
198 } while (0)
199
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201 do { \
202 brow = row / bs; \
203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \
204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205 rmax = bimax[brow]; \
206 nrow = bilen[brow]; \
207 bcol = col / bs; \
208 ridx = row % bs; \
209 cidx = col % bs; \
210 low = 0; \
211 high = nrow; \
212 while (high - low > 3) { \
213 t = (low + high) / 2; \
214 if (rp[t] > bcol) high = t; \
215 else low = t; \
216 } \
217 for (_i = low; _i < high; _i++) { \
218 if (rp[_i] > bcol) break; \
219 if (rp[_i] == bcol) { \
220 bap = ap + bs2 * _i + bs * cidx + ridx; \
221 if (addv == ADD_VALUES) *bap += value; \
222 else *bap = value; \
223 goto b_noinsert; \
224 } \
225 } \
226 if (b->nonew == 1) goto b_noinsert; \
227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229 N = nrow++ - 1; \
230 /* shift up all the later entries in this row */ \
231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234 rp[_i] = bcol; \
235 ap[bs2 * _i + bs * cidx + ridx] = value; \
236 b_noinsert:; \
237 bilen[brow] = nrow; \
238 } while (0)
239
MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243 MatScalar value;
244 PetscBool roworiented = baij->roworiented;
245 PetscInt i, j, row, col;
246 PetscInt rstart_orig = mat->rmap->rstart;
247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249
250 /* Some Variables required in the macro */
251 Mat A = baij->A;
252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254 MatScalar *aa = a->a;
255
256 Mat B = baij->B;
257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259 MatScalar *ba = b->a;
260
261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol;
262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2;
263 MatScalar *ap, *bap;
264
265 PetscFunctionBegin;
266 for (i = 0; i < m; i++) {
267 if (im[i] < 0) continue;
268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269 if (im[i] >= rstart_orig && im[i] < rend_orig) {
270 row = im[i] - rstart_orig;
271 for (j = 0; j < n; j++) {
272 if (in[j] >= cstart_orig && in[j] < cend_orig) {
273 col = in[j] - cstart_orig;
274 if (roworiented) value = v[i * n + j];
275 else value = v[i + j * m];
276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277 } else if (in[j] < 0) {
278 continue;
279 } else {
280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281 if (mat->was_assembled) {
282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285 col = col - 1;
286 #else
287 col = baij->colmap[in[j] / bs] - 1;
288 #endif
289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290 PetscCall(MatDisAssemble_MPIBAIJ(mat));
291 col = in[j];
292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293 B = baij->B;
294 b = (Mat_SeqBAIJ *)B->data;
295 bimax = b->imax;
296 bi = b->i;
297 bilen = b->ilen;
298 bj = b->j;
299 ba = b->a;
300 } else {
301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302 col += in[j] % bs;
303 }
304 } else col = in[j];
305 if (roworiented) value = v[i * n + j];
306 else value = v[i + j * m];
307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309 }
310 }
311 } else {
312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313 if (!baij->donotstash) {
314 mat->assembled = PETSC_FALSE;
315 if (roworiented) {
316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317 } else {
318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319 }
320 }
321 }
322 }
323 PetscFunctionReturn(PETSC_SUCCESS);
324 }
325
MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332 PetscBool roworiented = a->roworiented;
333 const PetscScalar *value = v;
334 MatScalar *ap, *aa = a->a, *bap;
335
336 PetscFunctionBegin;
337 rp = aj + ai[row];
338 ap = aa + bs2 * ai[row];
339 rmax = imax[row];
340 nrow = ailen[row];
341 value = v;
342 low = 0;
343 high = nrow;
344 while (high - low > 7) {
345 t = (low + high) / 2;
346 if (rp[t] > col) high = t;
347 else low = t;
348 }
349 for (i = low; i < high; i++) {
350 if (rp[i] > col) break;
351 if (rp[i] == col) {
352 bap = ap + bs2 * i;
353 if (roworiented) {
354 if (is == ADD_VALUES) {
355 for (ii = 0; ii < bs; ii++) {
356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357 }
358 } else {
359 for (ii = 0; ii < bs; ii++) {
360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361 }
362 }
363 } else {
364 if (is == ADD_VALUES) {
365 for (ii = 0; ii < bs; ii++, value += bs) {
366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367 bap += bs;
368 }
369 } else {
370 for (ii = 0; ii < bs; ii++, value += bs) {
371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372 bap += bs;
373 }
374 }
375 }
376 goto noinsert2;
377 }
378 }
379 if (nonew == 1) goto noinsert2;
380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382 N = nrow++ - 1;
383 high++;
384 /* shift up all the later entries in this row */
385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387 rp[i] = col;
388 bap = ap + bs2 * i;
389 if (roworiented) {
390 for (ii = 0; ii < bs; ii++) {
391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392 }
393 } else {
394 for (ii = 0; ii < bs; ii++) {
395 for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396 }
397 }
398 noinsert2:;
399 ailen[row] = nrow;
400 PetscFunctionReturn(PETSC_SUCCESS);
401 }
402
403 /*
404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
410 const PetscScalar *value;
411 MatScalar *barray = baij->barray;
412 PetscBool roworiented = baij->roworiented;
413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs;
414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416
417 PetscFunctionBegin;
418 if (!barray) {
419 PetscCall(PetscMalloc1(bs2, &barray));
420 baij->barray = barray;
421 }
422
423 if (roworiented) stepval = (n - 1) * bs;
424 else stepval = (m - 1) * bs;
425
426 for (i = 0; i < m; i++) {
427 if (im[i] < 0) continue;
428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429 if (im[i] >= rstart && im[i] < rend) {
430 row = im[i] - rstart;
431 for (j = 0; j < n; j++) {
432 /* If NumCol = 1 then a copy is not required */
433 if ((roworiented) && (n == 1)) {
434 barray = (MatScalar *)v + i * bs2;
435 } else if ((!roworiented) && (m == 1)) {
436 barray = (MatScalar *)v + j * bs2;
437 } else { /* Here a copy is required */
438 if (roworiented) {
439 value = v + (i * (stepval + bs) + j) * bs;
440 } else {
441 value = v + (j * (stepval + bs) + i) * bs;
442 }
443 for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445 barray += bs;
446 }
447 barray -= bs2;
448 }
449
450 if (in[j] >= cstart && in[j] < cend) {
451 col = in[j] - cstart;
452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453 } else if (in[j] < 0) {
454 continue;
455 } else {
456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457 if (mat->was_assembled) {
458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459
460 #if defined(PETSC_USE_CTABLE)
461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462 col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467 PetscCall(MatDisAssemble_MPIBAIJ(mat));
468 col = in[j];
469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470 } else col = in[j];
471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472 }
473 }
474 } else {
475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476 if (!baij->donotstash) {
477 if (roworiented) {
478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479 } else {
480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481 }
482 }
483 }
484 }
485 PetscFunctionReturn(PETSC_SUCCESS);
486 }
487
488 #define HASH_KEY 0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
495 PetscBool roworiented = baij->roworiented;
496 PetscInt i, j, row, col;
497 PetscInt rstart_orig = mat->rmap->rstart;
498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500 PetscReal tmp;
501 MatScalar **HD = baij->hd, value;
502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503
504 PetscFunctionBegin;
505 for (i = 0; i < m; i++) {
506 if (PetscDefined(USE_DEBUG)) {
507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509 }
510 row = im[i];
511 if (row >= rstart_orig && row < rend_orig) {
512 for (j = 0; j < n; j++) {
513 col = in[j];
514 if (roworiented) value = v[i * n + j];
515 else value = v[i + j * m];
516 /* Look up PetscInto the Hash Table */
517 key = (row / bs) * Nbs + (col / bs) + 1;
518 h1 = HASH(size, key, tmp);
519
520 idx = h1;
521 if (PetscDefined(USE_DEBUG)) {
522 insert_ct++;
523 total_ct++;
524 if (HT[idx] != key) {
525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526 if (idx == size) {
527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529 }
530 }
531 } else if (HT[idx] != key) {
532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533 if (idx == size) {
534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536 }
537 }
538 /* A HASH table entry is found, so insert the values at the correct address */
539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541 }
542 } else if (!baij->donotstash) {
543 if (roworiented) {
544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545 } else {
546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547 }
548 }
549 }
550 if (PetscDefined(USE_DEBUG)) {
551 baij->ht_total_ct += total_ct;
552 baij->ht_insert_ct += insert_ct;
553 }
554 PetscFunctionReturn(PETSC_SUCCESS);
555 }
556
MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
560 PetscBool roworiented = baij->roworiented;
561 PetscInt i, j, ii, jj, row, col;
562 PetscInt rstart = baij->rstartbs;
563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565 PetscReal tmp;
566 MatScalar **HD = baij->hd, *baij_a;
567 const PetscScalar *v_t, *value;
568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569
570 PetscFunctionBegin;
571 if (roworiented) stepval = (n - 1) * bs;
572 else stepval = (m - 1) * bs;
573
574 for (i = 0; i < m; i++) {
575 if (PetscDefined(USE_DEBUG)) {
576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578 }
579 row = im[i];
580 v_t = v + i * nbs2;
581 if (row >= rstart && row < rend) {
582 for (j = 0; j < n; j++) {
583 col = in[j];
584
585 /* Look up into the Hash Table */
586 key = row * Nbs + col + 1;
587 h1 = HASH(size, key, tmp);
588
589 idx = h1;
590 if (PetscDefined(USE_DEBUG)) {
591 total_ct++;
592 insert_ct++;
593 if (HT[idx] != key) {
594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595 if (idx == size) {
596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598 }
599 }
600 } else if (HT[idx] != key) {
601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602 if (idx == size) {
603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605 }
606 }
607 baij_a = HD[idx];
608 if (roworiented) {
609 /*value = v + i*(stepval+bs)*bs + j*bs;*/
610 /* value = v + (i*(stepval+bs)+j)*bs; */
611 value = v_t;
612 v_t += bs;
613 if (addv == ADD_VALUES) {
614 for (ii = 0; ii < bs; ii++, value += stepval) {
615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616 }
617 } else {
618 for (ii = 0; ii < bs; ii++, value += stepval) {
619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620 }
621 }
622 } else {
623 value = v + j * (stepval + bs) * bs + i * bs;
624 if (addv == ADD_VALUES) {
625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627 }
628 } else {
629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631 }
632 }
633 }
634 }
635 } else {
636 if (!baij->donotstash) {
637 if (roworiented) {
638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639 } else {
640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641 }
642 }
643 }
644 }
645 if (PetscDefined(USE_DEBUG)) {
646 baij->ht_total_ct += total_ct;
647 baij->ht_insert_ct += insert_ct;
648 }
649 PetscFunctionReturn(PETSC_SUCCESS);
650 }
651
MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657
658 PetscFunctionBegin;
659 for (i = 0; i < m; i++) {
660 if (idxm[i] < 0) continue; /* negative row */
661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
663 row = idxm[i] - bsrstart;
664 for (j = 0; j < n; j++) {
665 if (idxn[j] < 0) continue; /* negative column */
666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667 if (idxn[j] >= bscstart && idxn[j] < bscend) {
668 col = idxn[j] - bscstart;
669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670 } else {
671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674 data--;
675 #else
676 data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
679 else {
680 col = data + idxn[j] % bs;
681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682 }
683 }
684 }
685 }
686 PetscFunctionReturn(PETSC_SUCCESS);
687 }
688
MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal * nrm)689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
690 {
691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
692 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
693 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
694 PetscReal sum = 0.0;
695 MatScalar *v;
696
697 PetscFunctionBegin;
698 if (baij->size == 1) {
699 PetscCall(MatNorm(baij->A, type, nrm));
700 } else {
701 if (type == NORM_FROBENIUS) {
702 v = amat->a;
703 nz = amat->nz * bs2;
704 for (i = 0; i < nz; i++) {
705 sum += PetscRealPart(PetscConj(*v) * (*v));
706 v++;
707 }
708 v = bmat->a;
709 nz = bmat->nz * bs2;
710 for (i = 0; i < nz; i++) {
711 sum += PetscRealPart(PetscConj(*v) * (*v));
712 v++;
713 }
714 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
715 *nrm = PetscSqrtReal(*nrm);
716 } else if (type == NORM_1) { /* max column sum */
717 Vec col, bcol;
718 PetscScalar *array;
719 PetscInt *jj, *garray = baij->garray;
720
721 PetscCall(MatCreateVecs(mat, &col, NULL));
722 PetscCall(VecSet(col, 0.0));
723 PetscCall(VecGetArrayWrite(col, &array));
724 v = amat->a;
725 jj = amat->j;
726 for (i = 0; i < amat->nz; i++) {
727 for (j = 0; j < bs; j++) {
728 PetscInt col = bs * *jj + j; /* column index */
729
730 for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
731 }
732 jj++;
733 }
734 PetscCall(VecRestoreArrayWrite(col, &array));
735 PetscCall(MatCreateVecs(baij->B, &bcol, NULL));
736 PetscCall(VecSet(bcol, 0.0));
737 PetscCall(VecGetArrayWrite(bcol, &array));
738 v = bmat->a;
739 jj = bmat->j;
740 for (i = 0; i < bmat->nz; i++) {
741 for (j = 0; j < bs; j++) {
742 PetscInt col = bs * *jj + j; /* column index */
743
744 for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
745 }
746 jj++;
747 }
748 PetscCall(VecSetValuesBlocked(col, bmat->nbs, garray, array, ADD_VALUES));
749 PetscCall(VecRestoreArrayWrite(bcol, &array));
750 PetscCall(VecDestroy(&bcol));
751 PetscCall(VecAssemblyBegin(col));
752 PetscCall(VecAssemblyEnd(col));
753 PetscCall(VecNorm(col, NORM_INFINITY, nrm));
754 PetscCall(VecDestroy(&col));
755 } else if (type == NORM_INFINITY) { /* max row sum */
756 PetscReal *sums;
757 PetscCall(PetscMalloc1(bs, &sums));
758 sum = 0.0;
759 for (j = 0; j < amat->mbs; j++) {
760 for (row = 0; row < bs; row++) sums[row] = 0.0;
761 v = amat->a + bs2 * amat->i[j];
762 nz = amat->i[j + 1] - amat->i[j];
763 for (i = 0; i < nz; i++) {
764 for (col = 0; col < bs; col++) {
765 for (row = 0; row < bs; row++) {
766 sums[row] += PetscAbsScalar(*v);
767 v++;
768 }
769 }
770 }
771 v = bmat->a + bs2 * bmat->i[j];
772 nz = bmat->i[j + 1] - bmat->i[j];
773 for (i = 0; i < nz; i++) {
774 for (col = 0; col < bs; col++) {
775 for (row = 0; row < bs; row++) {
776 sums[row] += PetscAbsScalar(*v);
777 v++;
778 }
779 }
780 }
781 for (row = 0; row < bs; row++) {
782 if (sums[row] > sum) sum = sums[row];
783 }
784 }
785 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786 PetscCall(PetscFree(sums));
787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788 }
789 PetscFunctionReturn(PETSC_SUCCESS);
790 }
791
792 /*
793 Creates the hash table, and sets the table
794 This table is created only once.
795 If new entries need to be added to the matrix
796 then the hash table has to be destroyed and
797 recreated.
798 */
MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802 Mat A = baij->A, B = baij->B;
803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807 PetscInt *HT, key;
808 MatScalar **HD;
809 PetscReal tmp;
810 #if defined(PETSC_USE_INFO)
811 PetscInt ct = 0, max = 0;
812 #endif
813
814 PetscFunctionBegin;
815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816
817 baij->ht_size = (PetscInt)(factor * nz);
818 ht_size = baij->ht_size;
819
820 /* Allocate Memory for Hash Table */
821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822 HD = baij->hd;
823 HT = baij->ht;
824
825 /* Loop Over A */
826 for (i = 0; i < a->mbs; i++) {
827 for (j = ai[i]; j < ai[i + 1]; j++) {
828 row = i + rstart;
829 col = aj[j] + cstart;
830
831 key = row * Nbs + col + 1;
832 h1 = HASH(ht_size, key, tmp);
833 for (k = 0; k < ht_size; k++) {
834 if (!HT[(h1 + k) % ht_size]) {
835 HT[(h1 + k) % ht_size] = key;
836 HD[(h1 + k) % ht_size] = a->a + j * bs2;
837 break;
838 #if defined(PETSC_USE_INFO)
839 } else {
840 ct++;
841 #endif
842 }
843 }
844 #if defined(PETSC_USE_INFO)
845 if (k > max) max = k;
846 #endif
847 }
848 }
849 /* Loop Over B */
850 for (i = 0; i < b->mbs; i++) {
851 for (j = bi[i]; j < bi[i + 1]; j++) {
852 row = i + rstart;
853 col = garray[bj[j]];
854 key = row * Nbs + col + 1;
855 h1 = HASH(ht_size, key, tmp);
856 for (k = 0; k < ht_size; k++) {
857 if (!HT[(h1 + k) % ht_size]) {
858 HT[(h1 + k) % ht_size] = key;
859 HD[(h1 + k) % ht_size] = b->a + j * bs2;
860 break;
861 #if defined(PETSC_USE_INFO)
862 } else {
863 ct++;
864 #endif
865 }
866 }
867 #if defined(PETSC_USE_INFO)
868 if (k > max) max = k;
869 #endif
870 }
871 }
872
873 /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875 for (i = 0, j = 0; i < ht_size; i++) {
876 if (HT[i]) j++;
877 }
878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
879 #endif
880 PetscFunctionReturn(PETSC_SUCCESS);
881 }
882
MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886 PetscInt nstash, reallocs;
887
888 PetscFunctionBegin;
889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890
891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897 PetscFunctionReturn(PETSC_SUCCESS);
898 }
899
MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data;
904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2;
905 PetscInt *row, *col;
906 PetscBool r1, r2, r3, all_assembled;
907 MatScalar *val;
908 PetscMPIInt n;
909
910 PetscFunctionBegin;
911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912 if (!baij->donotstash && !mat->nooffprocentries) {
913 while (1) {
914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915 if (!flg) break;
916
917 for (i = 0; i < n;) {
918 /* Now identify the consecutive vals belonging to the same row */
919 for (j = i, rstart = row[j]; j < n; j++) {
920 if (row[j] != rstart) break;
921 }
922 if (j < n) ncols = j - i;
923 else ncols = n - i;
924 /* Now assemble all these values with a single function call */
925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926 i = j;
927 }
928 }
929 PetscCall(MatStashScatterEnd_Private(&mat->stash));
930 /* Now process the block-stash. Since the values are stashed column-oriented,
931 set the row-oriented flag to column-oriented, and after MatSetValues()
932 restore the original flags */
933 r1 = baij->roworiented;
934 r2 = a->roworiented;
935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936
937 baij->roworiented = PETSC_FALSE;
938 a->roworiented = PETSC_FALSE;
939 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
940 while (1) {
941 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942 if (!flg) break;
943
944 for (i = 0; i < n;) {
945 /* Now identify the consecutive vals belonging to the same row */
946 for (j = i, rstart = row[j]; j < n; j++) {
947 if (row[j] != rstart) break;
948 }
949 if (j < n) ncols = j - i;
950 else ncols = n - i;
951 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952 i = j;
953 }
954 }
955 PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956
957 baij->roworiented = r1;
958 a->roworiented = r2;
959 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960 }
961
962 PetscCall(MatAssemblyBegin(baij->A, mode));
963 PetscCall(MatAssemblyEnd(baij->A, mode));
964
965 /* determine if any process has disassembled, if so we must
966 also disassemble ourselves, in order that we may reassemble. */
967 /*
968 if nonzero structure of submatrix B cannot change then we know that
969 no process disassembled thus we can skip this stuff
970 */
971 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974 }
975
976 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977 PetscCall(MatAssemblyBegin(baij->B, mode));
978 PetscCall(MatAssemblyEnd(baij->B, mode));
979
980 #if defined(PETSC_USE_INFO)
981 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983
984 baij->ht_total_ct = 0;
985 baij->ht_insert_ct = 0;
986 }
987 #endif
988 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990
991 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
992 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993 }
994
995 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996
997 baij->rowvalues = NULL;
998
999 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1001 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003 }
1004 PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006
1007 #include <petscdraw.h>
MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)1008 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1009 {
1010 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1011 PetscMPIInt rank = baij->rank;
1012 PetscInt bs = mat->rmap->bs;
1013 PetscBool isascii, isdraw;
1014 PetscViewer sviewer;
1015 PetscViewerFormat format;
1016
1017 PetscFunctionBegin;
1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1020 if (isascii) {
1021 PetscCall(PetscViewerGetFormat(viewer, &format));
1022 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1023 MatInfo info;
1024 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1025 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1026 PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1027 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1028 mat->rmap->bs, info.memory));
1029 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1031 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1033 PetscCall(PetscViewerFlush(viewer));
1034 PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1035 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1036 PetscCall(VecScatterView(baij->Mvctx, viewer));
1037 PetscFunctionReturn(PETSC_SUCCESS);
1038 } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_FACTOR_INFO) PetscFunctionReturn(PETSC_SUCCESS);
1039 }
1040
1041 if (isdraw) {
1042 PetscDraw draw;
1043 PetscBool isnull;
1044 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1045 PetscCall(PetscDrawIsNull(draw, &isnull));
1046 if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048
1049 {
1050 /* assemble the entire matrix onto first processor. */
1051 Mat A;
1052 Mat_SeqBAIJ *Aloc;
1053 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1054 MatScalar *a;
1055 const char *matname;
1056
1057 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1058 /* Perhaps this should be the type of mat? */
1059 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1060 if (rank == 0) {
1061 PetscCall(MatSetSizes(A, M, N, M, N));
1062 } else {
1063 PetscCall(MatSetSizes(A, 0, 0, M, N));
1064 }
1065 PetscCall(MatSetType(A, MATMPIBAIJ));
1066 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1067 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1068
1069 /* copy over the A part */
1070 Aloc = (Mat_SeqBAIJ *)baij->A->data;
1071 ai = Aloc->i;
1072 aj = Aloc->j;
1073 a = Aloc->a;
1074 PetscCall(PetscMalloc1(bs, &rvals));
1075
1076 for (i = 0; i < mbs; i++) {
1077 rvals[0] = bs * (baij->rstartbs + i);
1078 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1079 for (j = ai[i]; j < ai[i + 1]; j++) {
1080 col = (baij->cstartbs + aj[j]) * bs;
1081 for (k = 0; k < bs; k++) {
1082 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1083 col++;
1084 a += bs;
1085 }
1086 }
1087 }
1088 /* copy over the B part */
1089 Aloc = (Mat_SeqBAIJ *)baij->B->data;
1090 ai = Aloc->i;
1091 aj = Aloc->j;
1092 a = Aloc->a;
1093 for (i = 0; i < mbs; i++) {
1094 rvals[0] = bs * (baij->rstartbs + i);
1095 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1096 for (j = ai[i]; j < ai[i + 1]; j++) {
1097 col = baij->garray[aj[j]] * bs;
1098 for (k = 0; k < bs; k++) {
1099 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1100 col++;
1101 a += bs;
1102 }
1103 }
1104 }
1105 PetscCall(PetscFree(rvals));
1106 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1107 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1108 /*
1109 Everyone has to call to draw the matrix since the graphics waits are
1110 synchronized across all processors that share the PetscDraw object
1111 */
1112 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1113 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1114 if (rank == 0) {
1115 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1116 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1117 }
1118 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1119 PetscCall(MatDestroy(&A));
1120 }
1121 PetscFunctionReturn(PETSC_SUCCESS);
1122 }
1123
1124 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)1125 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1126 {
1127 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
1128 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data;
1129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data;
1130 const PetscInt *garray = aij->garray;
1131 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1132 PetscCount nz, hnz;
1133 PetscInt *rowlens, *colidxs;
1134 PetscScalar *matvals;
1135 PetscMPIInt rank;
1136
1137 PetscFunctionBegin;
1138 PetscCall(PetscViewerSetUp(viewer));
1139
1140 M = mat->rmap->N;
1141 N = mat->cmap->N;
1142 m = mat->rmap->n;
1143 rs = mat->rmap->rstart;
1144 cs = mat->cmap->rstart;
1145 bs = mat->rmap->bs;
1146 nz = bs * bs * (A->nz + B->nz);
1147
1148 /* write matrix header */
1149 header[0] = MAT_FILE_CLASSID;
1150 header[1] = M;
1151 header[2] = N;
1152 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1153 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1154 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1155 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1156
1157 /* fill in and store row lengths */
1158 PetscCall(PetscMalloc1(m, &rowlens));
1159 for (cnt = 0, i = 0; i < A->mbs; i++)
1160 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1161 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1162 PetscCall(PetscFree(rowlens));
1163
1164 /* fill in and store column indices */
1165 PetscCall(PetscMalloc1(nz, &colidxs));
1166 for (cnt = 0, i = 0; i < A->mbs; i++) {
1167 for (k = 0; k < bs; k++) {
1168 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1169 if (garray[B->j[jb]] > cs / bs) break;
1170 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1171 }
1172 for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1173 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1174 for (; jb < B->i[i + 1]; jb++)
1175 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1176 }
1177 }
1178 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1179 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1180 PetscCall(PetscFree(colidxs));
1181
1182 /* fill in and store nonzero values */
1183 PetscCall(PetscMalloc1(nz, &matvals));
1184 for (cnt = 0, i = 0; i < A->mbs; i++) {
1185 for (k = 0; k < bs; k++) {
1186 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1187 if (garray[B->j[jb]] > cs / bs) break;
1188 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1189 }
1190 for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1191 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1192 for (; jb < B->i[i + 1]; jb++)
1193 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1194 }
1195 }
1196 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1197 PetscCall(PetscFree(matvals));
1198
1199 /* write block size option to the viewer's .info file */
1200 PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1201 PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203
MatView_MPIBAIJ(Mat mat,PetscViewer viewer)1204 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1205 {
1206 PetscBool isascii, isdraw, issocket, isbinary;
1207
1208 PetscFunctionBegin;
1209 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1210 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1211 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1213 if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1214 else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1215 PetscFunctionReturn(PETSC_SUCCESS);
1216 }
1217
MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)1218 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1219 {
1220 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1221 PetscInt nt;
1222
1223 PetscFunctionBegin;
1224 PetscCall(VecGetLocalSize(xx, &nt));
1225 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1226 PetscCall(VecGetLocalSize(yy, &nt));
1227 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1228 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1229 PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1230 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1231 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1232 PetscFunctionReturn(PETSC_SUCCESS);
1233 }
1234
MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1235 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1236 {
1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1238
1239 PetscFunctionBegin;
1240 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1241 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1242 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1243 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1244 PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246
MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)1247 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1248 {
1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1250
1251 PetscFunctionBegin;
1252 /* do nondiagonal part */
1253 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1254 /* do local part */
1255 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1256 /* add partial results together */
1257 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1258 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1259 PetscFunctionReturn(PETSC_SUCCESS);
1260 }
1261
MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1262 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1263 {
1264 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1265
1266 PetscFunctionBegin;
1267 /* do nondiagonal part */
1268 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1269 /* do local part */
1270 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1271 /* add partial results together */
1272 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1273 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1274 PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276
1277 /*
1278 This only works correctly for square matrices where the subblock A->A is the
1279 diagonal block
1280 */
MatGetDiagonal_MPIBAIJ(Mat A,Vec v)1281 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1282 {
1283 PetscFunctionBegin;
1284 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1285 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1286 PetscFunctionReturn(PETSC_SUCCESS);
1287 }
1288
MatScale_MPIBAIJ(Mat A,PetscScalar aa)1289 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1290 {
1291 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1292
1293 PetscFunctionBegin;
1294 PetscCall(MatScale(a->A, aa));
1295 PetscCall(MatScale(a->B, aa));
1296 PetscFunctionReturn(PETSC_SUCCESS);
1297 }
1298
MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1299 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1300 {
1301 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1302 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1303 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1304 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1305 PetscInt *cmap, *idx_p, cstart = mat->cstartbs;
1306
1307 PetscFunctionBegin;
1308 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1309 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1310 mat->getrowactive = PETSC_TRUE;
1311
1312 if (!mat->rowvalues && (idx || v)) {
1313 /*
1314 allocate enough space to hold information from the longest row.
1315 */
1316 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1317 PetscInt max = 1, mbs = mat->mbs, tmp;
1318 for (i = 0; i < mbs; i++) {
1319 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1320 if (max < tmp) max = tmp;
1321 }
1322 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1323 }
1324 lrow = row - brstart;
1325
1326 pvA = &vworkA;
1327 pcA = &cworkA;
1328 pvB = &vworkB;
1329 pcB = &cworkB;
1330 if (!v) {
1331 pvA = NULL;
1332 pvB = NULL;
1333 }
1334 if (!idx) {
1335 pcA = NULL;
1336 if (!v) pcB = NULL;
1337 }
1338 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1339 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1340 nztot = nzA + nzB;
1341
1342 cmap = mat->garray;
1343 if (v || idx) {
1344 if (nztot) {
1345 /* Sort by increasing column numbers, assuming A and B already sorted */
1346 PetscInt imark = -1;
1347 if (v) {
1348 *v = v_p = mat->rowvalues;
1349 for (i = 0; i < nzB; i++) {
1350 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1351 else break;
1352 }
1353 imark = i;
1354 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1355 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1356 }
1357 if (idx) {
1358 *idx = idx_p = mat->rowindices;
1359 if (imark > -1) {
1360 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1361 } else {
1362 for (i = 0; i < nzB; i++) {
1363 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1364 else break;
1365 }
1366 imark = i;
1367 }
1368 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1369 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1370 }
1371 } else {
1372 if (idx) *idx = NULL;
1373 if (v) *v = NULL;
1374 }
1375 }
1376 *nz = nztot;
1377 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1378 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1379 PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381
MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1382 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1383 {
1384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1385
1386 PetscFunctionBegin;
1387 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1388 baij->getrowactive = PETSC_FALSE;
1389 PetscFunctionReturn(PETSC_SUCCESS);
1390 }
1391
MatZeroEntries_MPIBAIJ(Mat A)1392 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1393 {
1394 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1395
1396 PetscFunctionBegin;
1397 PetscCall(MatZeroEntries(l->A));
1398 PetscCall(MatZeroEntries(l->B));
1399 PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401
MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo * info)1402 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1403 {
1404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data;
1405 Mat A = a->A, B = a->B;
1406 PetscLogDouble isend[5], irecv[5];
1407
1408 PetscFunctionBegin;
1409 info->block_size = (PetscReal)matin->rmap->bs;
1410
1411 PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1412
1413 isend[0] = info->nz_used;
1414 isend[1] = info->nz_allocated;
1415 isend[2] = info->nz_unneeded;
1416 isend[3] = info->memory;
1417 isend[4] = info->mallocs;
1418
1419 PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1420
1421 isend[0] += info->nz_used;
1422 isend[1] += info->nz_allocated;
1423 isend[2] += info->nz_unneeded;
1424 isend[3] += info->memory;
1425 isend[4] += info->mallocs;
1426
1427 if (flag == MAT_LOCAL) {
1428 info->nz_used = isend[0];
1429 info->nz_allocated = isend[1];
1430 info->nz_unneeded = isend[2];
1431 info->memory = isend[3];
1432 info->mallocs = isend[4];
1433 } else if (flag == MAT_GLOBAL_MAX) {
1434 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1435
1436 info->nz_used = irecv[0];
1437 info->nz_allocated = irecv[1];
1438 info->nz_unneeded = irecv[2];
1439 info->memory = irecv[3];
1440 info->mallocs = irecv[4];
1441 } else if (flag == MAT_GLOBAL_SUM) {
1442 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1443
1444 info->nz_used = irecv[0];
1445 info->nz_allocated = irecv[1];
1446 info->nz_unneeded = irecv[2];
1447 info->memory = irecv[3];
1448 info->mallocs = irecv[4];
1449 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1450 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1451 info->fill_ratio_needed = 0;
1452 info->factor_mallocs = 0;
1453 PetscFunctionReturn(PETSC_SUCCESS);
1454 }
1455
MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)1456 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1457 {
1458 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1459
1460 PetscFunctionBegin;
1461 switch (op) {
1462 case MAT_NEW_NONZERO_LOCATIONS:
1463 case MAT_NEW_NONZERO_ALLOCATION_ERR:
1464 case MAT_UNUSED_NONZERO_LOCATION_ERR:
1465 case MAT_KEEP_NONZERO_PATTERN:
1466 case MAT_NEW_NONZERO_LOCATION_ERR:
1467 MatCheckPreallocated(A, 1);
1468 PetscCall(MatSetOption(a->A, op, flg));
1469 PetscCall(MatSetOption(a->B, op, flg));
1470 break;
1471 case MAT_ROW_ORIENTED:
1472 MatCheckPreallocated(A, 1);
1473 a->roworiented = flg;
1474
1475 PetscCall(MatSetOption(a->A, op, flg));
1476 PetscCall(MatSetOption(a->B, op, flg));
1477 break;
1478 case MAT_IGNORE_OFF_PROC_ENTRIES:
1479 a->donotstash = flg;
1480 break;
1481 case MAT_USE_HASH_TABLE:
1482 a->ht_flag = flg;
1483 a->ht_fact = 1.39;
1484 break;
1485 case MAT_SYMMETRIC:
1486 case MAT_STRUCTURALLY_SYMMETRIC:
1487 case MAT_HERMITIAN:
1488 case MAT_SYMMETRY_ETERNAL:
1489 case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1490 case MAT_SPD_ETERNAL:
1491 /* if the diagonal matrix is square it inherits some of the properties above */
1492 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1493 break;
1494 default:
1495 break;
1496 }
1497 PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499
MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat * matout)1500 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1501 {
1502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1503 Mat_SeqBAIJ *Aloc;
1504 Mat B;
1505 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1506 PetscInt bs = A->rmap->bs, mbs = baij->mbs;
1507 MatScalar *a;
1508
1509 PetscFunctionBegin;
1510 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1511 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1512 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1513 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1514 PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1515 /* Do not know preallocation information, but must set block size */
1516 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1517 } else {
1518 B = *matout;
1519 }
1520
1521 /* copy over the A part */
1522 Aloc = (Mat_SeqBAIJ *)baij->A->data;
1523 ai = Aloc->i;
1524 aj = Aloc->j;
1525 a = Aloc->a;
1526 PetscCall(PetscMalloc1(bs, &rvals));
1527
1528 for (i = 0; i < mbs; i++) {
1529 rvals[0] = bs * (baij->rstartbs + i);
1530 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1531 for (j = ai[i]; j < ai[i + 1]; j++) {
1532 col = (baij->cstartbs + aj[j]) * bs;
1533 for (k = 0; k < bs; k++) {
1534 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1535
1536 col++;
1537 a += bs;
1538 }
1539 }
1540 }
1541 /* copy over the B part */
1542 Aloc = (Mat_SeqBAIJ *)baij->B->data;
1543 ai = Aloc->i;
1544 aj = Aloc->j;
1545 a = Aloc->a;
1546 for (i = 0; i < mbs; i++) {
1547 rvals[0] = bs * (baij->rstartbs + i);
1548 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1549 for (j = ai[i]; j < ai[i + 1]; j++) {
1550 col = baij->garray[aj[j]] * bs;
1551 for (k = 0; k < bs; k++) {
1552 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1553 col++;
1554 a += bs;
1555 }
1556 }
1557 }
1558 PetscCall(PetscFree(rvals));
1559 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1560 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1561
1562 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1563 else PetscCall(MatHeaderMerge(A, &B));
1564 PetscFunctionReturn(PETSC_SUCCESS);
1565 }
1566
MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)1567 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1568 {
1569 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1570 Mat a = baij->A, b = baij->B;
1571 PetscInt s1, s2, s3;
1572
1573 PetscFunctionBegin;
1574 PetscCall(MatGetLocalSize(mat, &s2, &s3));
1575 if (rr) {
1576 PetscCall(VecGetLocalSize(rr, &s1));
1577 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1578 /* Overlap communication with computation. */
1579 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1580 }
1581 if (ll) {
1582 PetscCall(VecGetLocalSize(ll, &s1));
1583 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1584 PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1585 }
1586 /* scale the diagonal block */
1587 PetscUseTypeMethod(a, diagonalscale, ll, rr);
1588
1589 if (rr) {
1590 /* Do a scatter end and then right scale the off-diagonal block */
1591 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1592 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1593 }
1594 PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596
MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1597 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1598 {
1599 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1600 PetscInt *lrows;
1601 PetscInt r, len;
1602 PetscBool cong;
1603
1604 PetscFunctionBegin;
1605 /* get locally owned rows */
1606 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1607 /* fix right-hand side if needed */
1608 if (x && b) {
1609 const PetscScalar *xx;
1610 PetscScalar *bb;
1611
1612 PetscCall(VecGetArrayRead(x, &xx));
1613 PetscCall(VecGetArray(b, &bb));
1614 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1615 PetscCall(VecRestoreArrayRead(x, &xx));
1616 PetscCall(VecRestoreArray(b, &bb));
1617 }
1618
1619 /* actually zap the local rows */
1620 /*
1621 Zero the required rows. If the "diagonal block" of the matrix
1622 is square and the user wishes to set the diagonal we use separate
1623 code so that MatSetValues() is not called for each diagonal allocating
1624 new memory, thus calling lots of mallocs and slowing things down.
1625
1626 */
1627 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1628 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1629 PetscCall(MatHasCongruentLayouts(A, &cong));
1630 if ((diag != 0.0) && cong) {
1631 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1632 } else if (diag != 0.0) {
1633 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1634 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1635 for (r = 0; r < len; ++r) {
1636 const PetscInt row = lrows[r] + A->rmap->rstart;
1637 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1638 }
1639 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1640 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1641 } else {
1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1643 }
1644 PetscCall(PetscFree(lrows));
1645
1646 /* only change matrix nonzero state if pattern was allowed to be changed */
1647 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1648 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1649 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1650 }
1651 PetscFunctionReturn(PETSC_SUCCESS);
1652 }
1653
MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1654 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1655 {
1656 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1657 PetscMPIInt n, p = 0;
1658 PetscInt i, j, k, r, len = 0, row, col, count;
1659 PetscInt *lrows, *owners = A->rmap->range;
1660 PetscSFNode *rrows;
1661 PetscSF sf;
1662 const PetscScalar *xx;
1663 PetscScalar *bb, *mask;
1664 Vec xmask, lmask;
1665 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data;
1666 PetscInt bs = A->rmap->bs, bs2 = baij->bs2;
1667 PetscScalar *aa;
1668
1669 PetscFunctionBegin;
1670 PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1671 /* Create SF where leaves are input rows and roots are owned rows */
1672 PetscCall(PetscMalloc1(n, &lrows));
1673 for (r = 0; r < n; ++r) lrows[r] = -1;
1674 PetscCall(PetscMalloc1(N, &rrows));
1675 for (r = 0; r < N; ++r) {
1676 const PetscInt idx = rows[r];
1677 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1678 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1679 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1680 }
1681 rrows[r].rank = p;
1682 rrows[r].index = rows[r] - owners[p];
1683 }
1684 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1685 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1686 /* Collect flags for rows to be zeroed */
1687 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1688 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1689 PetscCall(PetscSFDestroy(&sf));
1690 /* Compress and put in row numbers */
1691 for (r = 0; r < n; ++r)
1692 if (lrows[r] >= 0) lrows[len++] = r;
1693 /* zero diagonal part of matrix */
1694 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1695 /* handle off-diagonal part of matrix */
1696 PetscCall(MatCreateVecs(A, &xmask, NULL));
1697 PetscCall(VecDuplicate(l->lvec, &lmask));
1698 PetscCall(VecGetArray(xmask, &bb));
1699 for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1700 PetscCall(VecRestoreArray(xmask, &bb));
1701 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1702 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1703 PetscCall(VecDestroy(&xmask));
1704 if (x) {
1705 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1706 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1707 PetscCall(VecGetArrayRead(l->lvec, &xx));
1708 PetscCall(VecGetArray(b, &bb));
1709 }
1710 PetscCall(VecGetArray(lmask, &mask));
1711 /* remove zeroed rows of off-diagonal matrix */
1712 for (i = 0; i < len; ++i) {
1713 row = lrows[i];
1714 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1715 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1716 for (k = 0; k < count; ++k) {
1717 aa[0] = 0.0;
1718 aa += bs;
1719 }
1720 }
1721 /* loop over all elements of off process part of matrix zeroing removed columns*/
1722 for (i = 0; i < l->B->rmap->N; ++i) {
1723 row = i / bs;
1724 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1725 for (k = 0; k < bs; ++k) {
1726 col = bs * baij->j[j] + k;
1727 if (PetscAbsScalar(mask[col])) {
1728 aa = baij->a + j * bs2 + (i % bs) + bs * k;
1729 if (x) bb[i] -= aa[0] * xx[col];
1730 aa[0] = 0.0;
1731 }
1732 }
1733 }
1734 }
1735 if (x) {
1736 PetscCall(VecRestoreArray(b, &bb));
1737 PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1738 }
1739 PetscCall(VecRestoreArray(lmask, &mask));
1740 PetscCall(VecDestroy(&lmask));
1741 PetscCall(PetscFree(lrows));
1742
1743 /* only change matrix nonzero state if pattern was allowed to be changed */
1744 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1745 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1746 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1747 }
1748 PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750
MatSetUnfactored_MPIBAIJ(Mat A)1751 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1752 {
1753 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1754
1755 PetscFunctionBegin;
1756 PetscCall(MatSetUnfactored(a->A));
1757 PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759
1760 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1761
MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool * flag)1762 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1763 {
1764 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1765 Mat a, b, c, d;
1766 PetscBool flg;
1767
1768 PetscFunctionBegin;
1769 a = matA->A;
1770 b = matA->B;
1771 c = matB->A;
1772 d = matB->B;
1773
1774 PetscCall(MatEqual(a, c, &flg));
1775 if (flg) PetscCall(MatEqual(b, d, &flg));
1776 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1777 PetscFunctionReturn(PETSC_SUCCESS);
1778 }
1779
MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)1780 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1781 {
1782 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1783 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1784
1785 PetscFunctionBegin;
1786 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1787 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1788 PetscCall(MatCopy_Basic(A, B, str));
1789 } else {
1790 PetscCall(MatCopy(a->A, b->A, str));
1791 PetscCall(MatCopy(a->B, b->B, str));
1792 }
1793 PetscCall(PetscObjectStateIncrease((PetscObject)B));
1794 PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796
MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt * yltog,Mat X,const PetscInt * xltog,PetscInt * nnz)1797 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1798 {
1799 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs;
1800 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1801 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1802
1803 PetscFunctionBegin;
1804 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1805 PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807
MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)1808 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1809 {
1810 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1811 PetscBLASInt bnz, one = 1;
1812 Mat_SeqBAIJ *x, *y;
1813 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs;
1814
1815 PetscFunctionBegin;
1816 if (str == SAME_NONZERO_PATTERN) {
1817 PetscScalar alpha = a;
1818 x = (Mat_SeqBAIJ *)xx->A->data;
1819 y = (Mat_SeqBAIJ *)yy->A->data;
1820 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1821 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1822 x = (Mat_SeqBAIJ *)xx->B->data;
1823 y = (Mat_SeqBAIJ *)yy->B->data;
1824 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1825 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1826 PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1827 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1828 PetscCall(MatAXPY_Basic(Y, a, X, str));
1829 } else {
1830 Mat B;
1831 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1832 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1833 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1834 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1835 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1836 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1837 PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1838 PetscCall(MatSetType(B, MATMPIBAIJ));
1839 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1840 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1841 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1842 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1843 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1844 PetscCall(MatHeaderMerge(Y, &B));
1845 PetscCall(PetscFree(nnz_d));
1846 PetscCall(PetscFree(nnz_o));
1847 }
1848 PetscFunctionReturn(PETSC_SUCCESS);
1849 }
1850
MatConjugate_MPIBAIJ(Mat mat)1851 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1852 {
1853 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1854
1855 PetscFunctionBegin;
1856 PetscCall(MatConjugate_SeqBAIJ(a->A));
1857 PetscCall(MatConjugate_SeqBAIJ(a->B));
1858 PetscFunctionReturn(PETSC_SUCCESS);
1859 }
1860
MatRealPart_MPIBAIJ(Mat A)1861 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1862 {
1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1864
1865 PetscFunctionBegin;
1866 PetscCall(MatRealPart(a->A));
1867 PetscCall(MatRealPart(a->B));
1868 PetscFunctionReturn(PETSC_SUCCESS);
1869 }
1870
MatImaginaryPart_MPIBAIJ(Mat A)1871 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1872 {
1873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1874
1875 PetscFunctionBegin;
1876 PetscCall(MatImaginaryPart(a->A));
1877 PetscCall(MatImaginaryPart(a->B));
1878 PetscFunctionReturn(PETSC_SUCCESS);
1879 }
1880
MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)1881 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1882 {
1883 IS iscol_local;
1884 PetscInt csize;
1885
1886 PetscFunctionBegin;
1887 PetscCall(ISGetLocalSize(iscol, &csize));
1888 if (call == MAT_REUSE_MATRIX) {
1889 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1890 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1891 } else {
1892 PetscCall(ISAllGather(iscol, &iscol_local));
1893 }
1894 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1895 if (call == MAT_INITIAL_MATRIX) {
1896 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1897 PetscCall(ISDestroy(&iscol_local));
1898 }
1899 PetscFunctionReturn(PETSC_SUCCESS);
1900 }
1901
1902 /*
1903 Not great since it makes two copies of the submatrix, first an SeqBAIJ
1904 in local and then by concatenating the local matrices the end result.
1905 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1906 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1907 */
MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat * newmat,PetscBool sym)1908 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1909 {
1910 PetscMPIInt rank, size;
1911 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1912 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1913 Mat M, Mreuse;
1914 MatScalar *vwork, *aa;
1915 MPI_Comm comm;
1916 IS isrow_new, iscol_new;
1917 Mat_SeqBAIJ *aij;
1918
1919 PetscFunctionBegin;
1920 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1921 PetscCallMPI(MPI_Comm_rank(comm, &rank));
1922 PetscCallMPI(MPI_Comm_size(comm, &size));
1923 /* The compression and expansion should be avoided. Doesn't point
1924 out errors, might change the indices, hence buggey */
1925 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1926 if (isrow == iscol) {
1927 iscol_new = isrow_new;
1928 PetscCall(PetscObjectReference((PetscObject)iscol_new));
1929 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1930
1931 if (call == MAT_REUSE_MATRIX) {
1932 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1933 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1934 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1935 } else {
1936 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1937 }
1938 PetscCall(ISDestroy(&isrow_new));
1939 PetscCall(ISDestroy(&iscol_new));
1940 /*
1941 m - number of local rows
1942 n - number of columns (same on all processors)
1943 rstart - first row in new global matrix generated
1944 */
1945 PetscCall(MatGetBlockSize(mat, &bs));
1946 PetscCall(MatGetSize(Mreuse, &m, &n));
1947 m = m / bs;
1948 n = n / bs;
1949
1950 if (call == MAT_INITIAL_MATRIX) {
1951 aij = (Mat_SeqBAIJ *)Mreuse->data;
1952 ii = aij->i;
1953 jj = aij->j;
1954
1955 /*
1956 Determine the number of non-zeros in the diagonal and off-diagonal
1957 portions of the matrix in order to do correct preallocation
1958 */
1959
1960 /* first get start and end of "diagonal" columns */
1961 if (csize == PETSC_DECIDE) {
1962 PetscCall(ISGetSize(isrow, &mglobal));
1963 if (mglobal == n * bs) { /* square matrix */
1964 nlocal = m;
1965 } else {
1966 nlocal = n / size + ((n % size) > rank);
1967 }
1968 } else {
1969 nlocal = csize / bs;
1970 }
1971 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1972 rstart = rend - nlocal;
1973 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1974
1975 /* next, compute all the lengths */
1976 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1977 for (i = 0; i < m; i++) {
1978 jend = ii[i + 1] - ii[i];
1979 olen = 0;
1980 dlen = 0;
1981 for (j = 0; j < jend; j++) {
1982 if (*jj < rstart || *jj >= rend) olen++;
1983 else dlen++;
1984 jj++;
1985 }
1986 olens[i] = olen;
1987 dlens[i] = dlen;
1988 }
1989 PetscCall(MatCreate(comm, &M));
1990 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1991 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1992 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1993 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1994 PetscCall(PetscFree2(dlens, olens));
1995 } else {
1996 PetscInt ml, nl;
1997
1998 M = *newmat;
1999 PetscCall(MatGetLocalSize(M, &ml, &nl));
2000 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2001 PetscCall(MatZeroEntries(M));
2002 /*
2003 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2004 rather than the slower MatSetValues().
2005 */
2006 M->was_assembled = PETSC_TRUE;
2007 M->assembled = PETSC_FALSE;
2008 }
2009 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2010 PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2011 aij = (Mat_SeqBAIJ *)Mreuse->data;
2012 ii = aij->i;
2013 jj = aij->j;
2014 aa = aij->a;
2015 for (i = 0; i < m; i++) {
2016 row = rstart / bs + i;
2017 nz = ii[i + 1] - ii[i];
2018 cwork = jj;
2019 jj = PetscSafePointerPlusOffset(jj, nz);
2020 vwork = aa;
2021 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2022 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2023 }
2024
2025 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2026 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2027 *newmat = M;
2028
2029 /* save submatrix used in processor for next request */
2030 if (call == MAT_INITIAL_MATRIX) {
2031 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2032 PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2033 }
2034 PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036
MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat * B)2037 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2038 {
2039 MPI_Comm comm, pcomm;
2040 PetscInt clocal_size, nrows;
2041 const PetscInt *rows;
2042 PetscMPIInt size;
2043 IS crowp, lcolp;
2044
2045 PetscFunctionBegin;
2046 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2047 /* make a collective version of 'rowp' */
2048 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2049 if (pcomm == comm) {
2050 crowp = rowp;
2051 } else {
2052 PetscCall(ISGetSize(rowp, &nrows));
2053 PetscCall(ISGetIndices(rowp, &rows));
2054 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2055 PetscCall(ISRestoreIndices(rowp, &rows));
2056 }
2057 PetscCall(ISSetPermutation(crowp));
2058 /* make a local version of 'colp' */
2059 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2060 PetscCallMPI(MPI_Comm_size(pcomm, &size));
2061 if (size == 1) {
2062 lcolp = colp;
2063 } else {
2064 PetscCall(ISAllGather(colp, &lcolp));
2065 }
2066 PetscCall(ISSetPermutation(lcolp));
2067 /* now we just get the submatrix */
2068 PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2069 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2070 /* clean up */
2071 if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2072 if (size > 1) PetscCall(ISDestroy(&lcolp));
2073 PetscFunctionReturn(PETSC_SUCCESS);
2074 }
2075
MatGetGhosts_MPIBAIJ(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])2076 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2077 {
2078 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2079 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data;
2080
2081 PetscFunctionBegin;
2082 if (nghosts) *nghosts = B->nbs;
2083 if (ghosts) *ghosts = baij->garray;
2084 PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086
MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat * newmat)2087 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2088 {
2089 Mat B;
2090 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2091 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2092 Mat_SeqAIJ *b;
2093 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL;
2094 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2095 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2096
2097 PetscFunctionBegin;
2098 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2099 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2100
2101 /* Tell every processor the number of nonzeros per row */
2102 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2103 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2104 PetscCall(PetscMalloc1(2 * size, &recvcounts));
2105 displs = recvcounts + size;
2106 for (i = 0; i < size; i++) {
2107 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2108 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2109 }
2110 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2111 /* Create the sequential matrix of the same type as the local block diagonal */
2112 PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2113 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2114 PetscCall(MatSetType(B, MATSEQAIJ));
2115 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2116 b = (Mat_SeqAIJ *)B->data;
2117
2118 /* Copy my part of matrix column indices over */
2119 sendcount = ad->nz + bd->nz;
2120 jsendbuf = b->j + b->i[rstarts[rank] / bs];
2121 a_jsendbuf = ad->j;
2122 b_jsendbuf = bd->j;
2123 n = A->rmap->rend / bs - A->rmap->rstart / bs;
2124 cnt = 0;
2125 for (i = 0; i < n; i++) {
2126 /* put in lower diagonal portion */
2127 m = bd->i[i + 1] - bd->i[i];
2128 while (m > 0) {
2129 /* is it above diagonal (in bd (compressed) numbering) */
2130 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2131 jsendbuf[cnt++] = garray[*b_jsendbuf++];
2132 m--;
2133 }
2134
2135 /* put in diagonal portion */
2136 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2137
2138 /* put in upper diagonal portion */
2139 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2140 }
2141 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2142
2143 /* Gather all column indices to all processors */
2144 for (i = 0; i < size; i++) {
2145 recvcounts[i] = 0;
2146 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2147 }
2148 displs[0] = 0;
2149 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2150 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2151 /* Assemble the matrix into usable form (note numerical values not yet set) */
2152 /* set the b->ilen (length of each row) values */
2153 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2154 /* set the b->i indices */
2155 b->i[0] = 0;
2156 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2157 PetscCall(PetscFree(lens));
2158 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2159 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2160 PetscCall(PetscFree(recvcounts));
2161
2162 PetscCall(MatPropagateSymmetryOptions(A, B));
2163 *newmat = B;
2164 PetscFunctionReturn(PETSC_SUCCESS);
2165 }
2166
MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)2167 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2168 {
2169 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2170 Vec bb1 = NULL;
2171
2172 PetscFunctionBegin;
2173 if (flag == SOR_APPLY_UPPER) {
2174 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2175 PetscFunctionReturn(PETSC_SUCCESS);
2176 }
2177
2178 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2179
2180 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2181 if (flag & SOR_ZERO_INITIAL_GUESS) {
2182 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2183 its--;
2184 }
2185
2186 while (its--) {
2187 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2188 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2189
2190 /* update rhs: bb1 = bb - B*x */
2191 PetscCall(VecScale(mat->lvec, -1.0));
2192 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2193
2194 /* local sweep */
2195 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2196 }
2197 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2198 if (flag & SOR_ZERO_INITIAL_GUESS) {
2199 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2200 its--;
2201 }
2202 while (its--) {
2203 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2204 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2205
2206 /* update rhs: bb1 = bb - B*x */
2207 PetscCall(VecScale(mat->lvec, -1.0));
2208 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2209
2210 /* local sweep */
2211 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2212 }
2213 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2214 if (flag & SOR_ZERO_INITIAL_GUESS) {
2215 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2216 its--;
2217 }
2218 while (its--) {
2219 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2220 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2221
2222 /* update rhs: bb1 = bb - B*x */
2223 PetscCall(VecScale(mat->lvec, -1.0));
2224 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2225
2226 /* local sweep */
2227 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2228 }
2229 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2230
2231 PetscCall(VecDestroy(&bb1));
2232 PetscFunctionReturn(PETSC_SUCCESS);
2233 }
2234
MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal * reductions)2235 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2236 {
2237 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2238 PetscInt m, N, i, *garray = aij->garray;
2239 PetscInt ib, jb, bs = A->rmap->bs;
2240 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2241 MatScalar *a_val = a_aij->a;
2242 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2243 MatScalar *b_val = b_aij->a;
2244 PetscReal *work;
2245
2246 PetscFunctionBegin;
2247 PetscCall(MatGetSize(A, &m, &N));
2248 PetscCall(PetscCalloc1(N, &work));
2249 if (type == NORM_2) {
2250 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2251 for (jb = 0; jb < bs; jb++) {
2252 for (ib = 0; ib < bs; ib++) {
2253 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2254 a_val++;
2255 }
2256 }
2257 }
2258 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2259 for (jb = 0; jb < bs; jb++) {
2260 for (ib = 0; ib < bs; ib++) {
2261 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2262 b_val++;
2263 }
2264 }
2265 }
2266 } else if (type == NORM_1) {
2267 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2268 for (jb = 0; jb < bs; jb++) {
2269 for (ib = 0; ib < bs; ib++) {
2270 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2271 a_val++;
2272 }
2273 }
2274 }
2275 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2276 for (jb = 0; jb < bs; jb++) {
2277 for (ib = 0; ib < bs; ib++) {
2278 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2279 b_val++;
2280 }
2281 }
2282 }
2283 } else if (type == NORM_INFINITY) {
2284 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2285 for (jb = 0; jb < bs; jb++) {
2286 for (ib = 0; ib < bs; ib++) {
2287 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2288 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2289 a_val++;
2290 }
2291 }
2292 }
2293 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2294 for (jb = 0; jb < bs; jb++) {
2295 for (ib = 0; ib < bs; ib++) {
2296 PetscInt col = garray[b_aij->j[i]] * bs + jb;
2297 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2298 b_val++;
2299 }
2300 }
2301 }
2302 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2303 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2304 for (jb = 0; jb < bs; jb++) {
2305 for (ib = 0; ib < bs; ib++) {
2306 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2307 a_val++;
2308 }
2309 }
2310 }
2311 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2312 for (jb = 0; jb < bs; jb++) {
2313 for (ib = 0; ib < bs; ib++) {
2314 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2315 b_val++;
2316 }
2317 }
2318 }
2319 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2320 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2321 for (jb = 0; jb < bs; jb++) {
2322 for (ib = 0; ib < bs; ib++) {
2323 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2324 a_val++;
2325 }
2326 }
2327 }
2328 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2329 for (jb = 0; jb < bs; jb++) {
2330 for (ib = 0; ib < bs; ib++) {
2331 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2332 b_val++;
2333 }
2334 }
2335 }
2336 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2337 if (type == NORM_INFINITY) {
2338 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2339 } else {
2340 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2341 }
2342 PetscCall(PetscFree(work));
2343 if (type == NORM_2) {
2344 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2345 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2346 for (i = 0; i < N; i++) reductions[i] /= m;
2347 }
2348 PetscFunctionReturn(PETSC_SUCCESS);
2349 }
2350
MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar ** values)2351 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2352 {
2353 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2354
2355 PetscFunctionBegin;
2356 PetscCall(MatInvertBlockDiagonal(a->A, values));
2357 A->factorerrortype = a->A->factorerrortype;
2358 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2359 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row;
2360 PetscFunctionReturn(PETSC_SUCCESS);
2361 }
2362
MatShift_MPIBAIJ(Mat Y,PetscScalar a)2363 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2364 {
2365 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2366 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data;
2367
2368 PetscFunctionBegin;
2369 if (!Y->preallocated) {
2370 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2371 } else if (!aij->nz) {
2372 PetscInt nonew = aij->nonew;
2373 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2374 aij->nonew = nonew;
2375 }
2376 PetscCall(MatShift_Basic(Y, a));
2377 PetscFunctionReturn(PETSC_SUCCESS);
2378 }
2379
MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat * a)2380 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2381 {
2382 PetscFunctionBegin;
2383 *a = ((Mat_MPIBAIJ *)A->data)->A;
2384 PetscFunctionReturn(PETSC_SUCCESS);
2385 }
2386
MatEliminateZeros_MPIBAIJ(Mat A,PetscBool keep)2387 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2388 {
2389 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2390
2391 PetscFunctionBegin;
2392 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients
2393 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2394 PetscFunctionReturn(PETSC_SUCCESS);
2395 }
2396
2397 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2398 MatGetRow_MPIBAIJ,
2399 MatRestoreRow_MPIBAIJ,
2400 MatMult_MPIBAIJ,
2401 /* 4*/ MatMultAdd_MPIBAIJ,
2402 MatMultTranspose_MPIBAIJ,
2403 MatMultTransposeAdd_MPIBAIJ,
2404 NULL,
2405 NULL,
2406 NULL,
2407 /*10*/ NULL,
2408 NULL,
2409 NULL,
2410 MatSOR_MPIBAIJ,
2411 MatTranspose_MPIBAIJ,
2412 /*15*/ MatGetInfo_MPIBAIJ,
2413 MatEqual_MPIBAIJ,
2414 MatGetDiagonal_MPIBAIJ,
2415 MatDiagonalScale_MPIBAIJ,
2416 MatNorm_MPIBAIJ,
2417 /*20*/ MatAssemblyBegin_MPIBAIJ,
2418 MatAssemblyEnd_MPIBAIJ,
2419 MatSetOption_MPIBAIJ,
2420 MatZeroEntries_MPIBAIJ,
2421 /*24*/ MatZeroRows_MPIBAIJ,
2422 NULL,
2423 NULL,
2424 NULL,
2425 NULL,
2426 /*29*/ MatSetUp_MPI_Hash,
2427 NULL,
2428 NULL,
2429 MatGetDiagonalBlock_MPIBAIJ,
2430 NULL,
2431 /*34*/ MatDuplicate_MPIBAIJ,
2432 NULL,
2433 NULL,
2434 NULL,
2435 NULL,
2436 /*39*/ MatAXPY_MPIBAIJ,
2437 MatCreateSubMatrices_MPIBAIJ,
2438 MatIncreaseOverlap_MPIBAIJ,
2439 MatGetValues_MPIBAIJ,
2440 MatCopy_MPIBAIJ,
2441 /*44*/ NULL,
2442 MatScale_MPIBAIJ,
2443 MatShift_MPIBAIJ,
2444 NULL,
2445 MatZeroRowsColumns_MPIBAIJ,
2446 /*49*/ NULL,
2447 NULL,
2448 NULL,
2449 NULL,
2450 NULL,
2451 /*54*/ MatFDColoringCreate_MPIXAIJ,
2452 NULL,
2453 MatSetUnfactored_MPIBAIJ,
2454 MatPermute_MPIBAIJ,
2455 MatSetValuesBlocked_MPIBAIJ,
2456 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2457 MatDestroy_MPIBAIJ,
2458 MatView_MPIBAIJ,
2459 NULL,
2460 NULL,
2461 /*64*/ NULL,
2462 NULL,
2463 NULL,
2464 NULL,
2465 MatGetRowMaxAbs_MPIBAIJ,
2466 /*69*/ NULL,
2467 NULL,
2468 NULL,
2469 MatFDColoringApply_BAIJ,
2470 NULL,
2471 /*74*/ NULL,
2472 NULL,
2473 NULL,
2474 NULL,
2475 MatLoad_MPIBAIJ,
2476 /*79*/ NULL,
2477 NULL,
2478 NULL,
2479 NULL,
2480 NULL,
2481 /*84*/ NULL,
2482 NULL,
2483 NULL,
2484 NULL,
2485 NULL,
2486 /*89*/ NULL,
2487 NULL,
2488 NULL,
2489 NULL,
2490 MatConjugate_MPIBAIJ,
2491 /*94*/ NULL,
2492 NULL,
2493 MatRealPart_MPIBAIJ,
2494 MatImaginaryPart_MPIBAIJ,
2495 NULL,
2496 /*99*/ NULL,
2497 NULL,
2498 NULL,
2499 NULL,
2500 NULL,
2501 /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2502 NULL,
2503 MatGetGhosts_MPIBAIJ,
2504 NULL,
2505 NULL,
2506 /*109*/ NULL,
2507 NULL,
2508 NULL,
2509 NULL,
2510 MatGetMultiProcBlock_MPIBAIJ,
2511 /*114*/ NULL,
2512 MatGetColumnReductions_MPIBAIJ,
2513 MatInvertBlockDiagonal_MPIBAIJ,
2514 NULL,
2515 NULL,
2516 /*119*/ NULL,
2517 NULL,
2518 NULL,
2519 NULL,
2520 NULL,
2521 /*124*/ NULL,
2522 NULL,
2523 MatSetBlockSizes_Default,
2524 NULL,
2525 MatFDColoringSetUp_MPIXAIJ,
2526 /*129*/ NULL,
2527 MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2528 NULL,
2529 NULL,
2530 NULL,
2531 /*134*/ NULL,
2532 NULL,
2533 MatEliminateZeros_MPIBAIJ,
2534 MatGetRowSumAbs_MPIBAIJ,
2535 NULL,
2536 /*139*/ NULL,
2537 NULL,
2538 MatCopyHashToXAIJ_MPI_Hash,
2539 NULL,
2540 NULL};
2541
2542 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2543 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2544
MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2545 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2546 {
2547 PetscInt m, rstart, cstart, cend;
2548 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2549 const PetscInt *JJ = NULL;
2550 PetscScalar *values = NULL;
2551 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2552 PetscBool nooffprocentries;
2553
2554 PetscFunctionBegin;
2555 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2556 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2557 PetscCall(PetscLayoutSetUp(B->rmap));
2558 PetscCall(PetscLayoutSetUp(B->cmap));
2559 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2560 m = B->rmap->n / bs;
2561 rstart = B->rmap->rstart / bs;
2562 cstart = B->cmap->rstart / bs;
2563 cend = B->cmap->rend / bs;
2564
2565 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2566 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2567 for (i = 0; i < m; i++) {
2568 nz = ii[i + 1] - ii[i];
2569 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2570 nz_max = PetscMax(nz_max, nz);
2571 dlen = 0;
2572 olen = 0;
2573 JJ = jj + ii[i];
2574 for (j = 0; j < nz; j++) {
2575 if (*JJ < cstart || *JJ >= cend) olen++;
2576 else dlen++;
2577 JJ++;
2578 }
2579 d_nnz[i] = dlen;
2580 o_nnz[i] = olen;
2581 }
2582 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2583 PetscCall(PetscFree2(d_nnz, o_nnz));
2584
2585 values = (PetscScalar *)V;
2586 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2587 for (i = 0; i < m; i++) {
2588 PetscInt row = i + rstart;
2589 PetscInt ncols = ii[i + 1] - ii[i];
2590 const PetscInt *icols = jj + ii[i];
2591 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2592 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2593 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2594 } else { /* block ordering does not match so we can only insert one block at a time. */
2595 PetscInt j;
2596 for (j = 0; j < ncols; j++) {
2597 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2598 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2599 }
2600 }
2601 }
2602
2603 if (!V) PetscCall(PetscFree(values));
2604 nooffprocentries = B->nooffprocentries;
2605 B->nooffprocentries = PETSC_TRUE;
2606 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2607 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2608 B->nooffprocentries = nooffprocentries;
2609
2610 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2611 PetscFunctionReturn(PETSC_SUCCESS);
2612 }
2613
2614 /*@C
2615 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2616
2617 Collective
2618
2619 Input Parameters:
2620 + B - the matrix
2621 . bs - the block size
2622 . i - the indices into `j` for the start of each local row (starts with zero)
2623 . j - the column indices for each local row (starts with zero) these must be sorted for each row
2624 - v - optional values in the matrix, use `NULL` if not provided
2625
2626 Level: advanced
2627
2628 Notes:
2629 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2630 thus you CANNOT change the matrix entries by changing the values of `v` after you have
2631 called this routine.
2632
2633 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs
2634 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2635 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
2636 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2637 block column and the second index is over columns within a block.
2638
2639 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2640
2641 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2642 @*/
MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])2643 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2644 {
2645 PetscFunctionBegin;
2646 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2647 PetscValidType(B, 1);
2648 PetscValidLogicalCollectiveInt(B, bs, 2);
2649 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2650 PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652
MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)2653 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2654 {
2655 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2656 PetscInt i;
2657 PetscMPIInt size;
2658
2659 PetscFunctionBegin;
2660 if (B->hash_active) {
2661 B->ops[0] = b->cops;
2662 B->hash_active = PETSC_FALSE;
2663 }
2664 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2665 PetscCall(MatSetBlockSize(B, bs));
2666 PetscCall(PetscLayoutSetUp(B->rmap));
2667 PetscCall(PetscLayoutSetUp(B->cmap));
2668 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2669
2670 if (d_nnz) {
2671 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2672 }
2673 if (o_nnz) {
2674 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2675 }
2676
2677 b->bs2 = bs * bs;
2678 b->mbs = B->rmap->n / bs;
2679 b->nbs = B->cmap->n / bs;
2680 b->Mbs = B->rmap->N / bs;
2681 b->Nbs = B->cmap->N / bs;
2682
2683 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2684 b->rstartbs = B->rmap->rstart / bs;
2685 b->rendbs = B->rmap->rend / bs;
2686 b->cstartbs = B->cmap->rstart / bs;
2687 b->cendbs = B->cmap->rend / bs;
2688
2689 #if defined(PETSC_USE_CTABLE)
2690 PetscCall(PetscHMapIDestroy(&b->colmap));
2691 #else
2692 PetscCall(PetscFree(b->colmap));
2693 #endif
2694 PetscCall(PetscFree(b->garray));
2695 PetscCall(VecDestroy(&b->lvec));
2696 PetscCall(VecScatterDestroy(&b->Mvctx));
2697
2698 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2699
2700 MatSeqXAIJGetOptions_Private(b->B);
2701 PetscCall(MatDestroy(&b->B));
2702 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2703 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2704 PetscCall(MatSetType(b->B, MATSEQBAIJ));
2705 MatSeqXAIJRestoreOptions_Private(b->B);
2706
2707 MatSeqXAIJGetOptions_Private(b->A);
2708 PetscCall(MatDestroy(&b->A));
2709 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2710 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2711 PetscCall(MatSetType(b->A, MATSEQBAIJ));
2712 MatSeqXAIJRestoreOptions_Private(b->A);
2713
2714 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2715 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2716 B->preallocated = PETSC_TRUE;
2717 B->was_assembled = PETSC_FALSE;
2718 B->assembled = PETSC_FALSE;
2719 PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721
2722 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2723 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2724
MatConvert_MPIBAIJ_MPIAdj(Mat B,MatType newtype,MatReuse reuse,Mat * adj)2725 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2726 {
2727 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2728 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2729 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2730 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2731
2732 PetscFunctionBegin;
2733 PetscCall(PetscMalloc1(M + 1, &ii));
2734 ii[0] = 0;
2735 for (i = 0; i < M; i++) {
2736 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2737 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2738 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2739 /* remove one from count of matrix has diagonal */
2740 for (j = id[i]; j < id[i + 1]; j++) {
2741 if (jd[j] == i) {
2742 ii[i + 1]--;
2743 break;
2744 }
2745 }
2746 }
2747 PetscCall(PetscMalloc1(ii[M], &jj));
2748 cnt = 0;
2749 for (i = 0; i < M; i++) {
2750 for (j = io[i]; j < io[i + 1]; j++) {
2751 if (garray[jo[j]] > rstart) break;
2752 jj[cnt++] = garray[jo[j]];
2753 }
2754 for (k = id[i]; k < id[i + 1]; k++) {
2755 if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2756 }
2757 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2758 }
2759 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2760 PetscFunctionReturn(PETSC_SUCCESS);
2761 }
2762
2763 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2764
2765 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2766
MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)2767 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2768 {
2769 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2770 Mat_MPIAIJ *b;
2771 Mat B;
2772
2773 PetscFunctionBegin;
2774 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2775
2776 if (reuse == MAT_REUSE_MATRIX) {
2777 B = *newmat;
2778 } else {
2779 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2780 PetscCall(MatSetType(B, MATMPIAIJ));
2781 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2782 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2783 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2784 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2785 }
2786 b = (Mat_MPIAIJ *)B->data;
2787
2788 if (reuse == MAT_REUSE_MATRIX) {
2789 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2790 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2791 } else {
2792 PetscInt *garray = a->garray;
2793 Mat_SeqAIJ *bB;
2794 PetscInt bs, nnz;
2795 PetscCall(MatDestroy(&b->A));
2796 PetscCall(MatDestroy(&b->B));
2797 /* just clear out the data structure */
2798 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2799 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2800 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2801
2802 /* Global numbering for b->B columns */
2803 bB = (Mat_SeqAIJ *)b->B->data;
2804 bs = A->rmap->bs;
2805 nnz = bB->i[A->rmap->n];
2806 for (PetscInt k = 0; k < nnz; k++) {
2807 PetscInt bj = bB->j[k] / bs;
2808 PetscInt br = bB->j[k] % bs;
2809 bB->j[k] = garray[bj] * bs + br;
2810 }
2811 }
2812 PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2813 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2814 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2815 PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
2816
2817 if (reuse == MAT_INPLACE_MATRIX) {
2818 PetscCall(MatHeaderReplace(A, &B));
2819 } else {
2820 *newmat = B;
2821 }
2822 PetscFunctionReturn(PETSC_SUCCESS);
2823 }
2824
2825 /*MC
2826 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2827
2828 Options Database Keys:
2829 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2830 . -mat_block_size <bs> - set the blocksize used to store the matrix
2831 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
2832 - -mat_use_hash_table <fact> - set hash table factor
2833
2834 Level: beginner
2835
2836 Note:
2837 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2838 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2839
2840 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2841 M*/
2842
2843 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2844
MatCreate_MPIBAIJ(Mat B)2845 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2846 {
2847 Mat_MPIBAIJ *b;
2848 PetscBool flg = PETSC_FALSE;
2849
2850 PetscFunctionBegin;
2851 PetscCall(PetscNew(&b));
2852 B->data = (void *)b;
2853 B->ops[0] = MatOps_Values;
2854 B->assembled = PETSC_FALSE;
2855
2856 B->insertmode = NOT_SET_VALUES;
2857 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2858 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2859
2860 /* build local table of row and column ownerships */
2861 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2862
2863 /* build cache for off array entries formed */
2864 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2865
2866 b->donotstash = PETSC_FALSE;
2867 b->colmap = NULL;
2868 b->garray = NULL;
2869 b->roworiented = PETSC_TRUE;
2870
2871 /* stuff used in block assembly */
2872 b->barray = NULL;
2873
2874 /* stuff used for matrix vector multiply */
2875 b->lvec = NULL;
2876 b->Mvctx = NULL;
2877
2878 /* stuff for MatGetRow() */
2879 b->rowindices = NULL;
2880 b->rowvalues = NULL;
2881 b->getrowactive = PETSC_FALSE;
2882
2883 /* hash table stuff */
2884 b->ht = NULL;
2885 b->hd = NULL;
2886 b->ht_size = 0;
2887 b->ht_flag = PETSC_FALSE;
2888 b->ht_fact = 0;
2889 b->ht_total_ct = 0;
2890 b->ht_insert_ct = 0;
2891
2892 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2893 b->ijonly = PETSC_FALSE;
2894
2895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2898 #if defined(PETSC_HAVE_HYPRE)
2899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2900 #endif
2901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2908 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2909
2910 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2911 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2912 if (flg) {
2913 PetscReal fact = 1.39;
2914 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2915 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2916 if (fact <= 1.0) fact = 1.39;
2917 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2918 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2919 }
2920 PetscOptionsEnd();
2921 PetscFunctionReturn(PETSC_SUCCESS);
2922 }
2923
2924 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2925 /*MC
2926 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2927
2928 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2929 and `MATMPIBAIJ` otherwise.
2930
2931 Options Database Keys:
2932 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2933
2934 Level: beginner
2935
2936 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2937 M*/
2938
2939 /*@
2940 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2941 (block compressed row).
2942
2943 Collective
2944
2945 Input Parameters:
2946 + B - the matrix
2947 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2948 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2949 . d_nz - number of block nonzeros per block row in diagonal portion of local
2950 submatrix (same for all local rows)
2951 . d_nnz - array containing the number of block nonzeros in the various block rows
2952 of the in diagonal portion of the local (possibly different for each block
2953 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and
2954 set it even if it is zero.
2955 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2956 submatrix (same for all local rows).
2957 - o_nnz - array containing the number of nonzeros in the various block rows of the
2958 off-diagonal portion of the local submatrix (possibly different for
2959 each block row) or `NULL`.
2960
2961 If the *_nnz parameter is given then the *_nz parameter is ignored
2962
2963 Options Database Keys:
2964 + -mat_block_size - size of the blocks to use
2965 - -mat_use_hash_table <fact> - set hash table factor
2966
2967 Level: intermediate
2968
2969 Notes:
2970 For good matrix assembly performance
2971 the user should preallocate the matrix storage by setting the parameters
2972 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately,
2973 performance can be increased by more than a factor of 50.
2974
2975 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2976 than it must be used on all processors that share the object for that argument.
2977
2978 Storage Information:
2979 For a square global matrix we define each processor's diagonal portion
2980 to be its local rows and the corresponding columns (a square submatrix);
2981 each processor's off-diagonal portion encompasses the remainder of the
2982 local matrix (a rectangular submatrix).
2983
2984 The user can specify preallocated storage for the diagonal part of
2985 the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2986 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2987 memory allocation. Likewise, specify preallocated storage for the
2988 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2989
2990 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2991 the figure below we depict these three local rows and all columns (0-11).
2992
2993 .vb
2994 0 1 2 3 4 5 6 7 8 9 10 11
2995 --------------------------
2996 row 3 |o o o d d d o o o o o o
2997 row 4 |o o o d d d o o o o o o
2998 row 5 |o o o d d d o o o o o o
2999 --------------------------
3000 .ve
3001
3002 Thus, any entries in the d locations are stored in the d (diagonal)
3003 submatrix, and any entries in the o locations are stored in the
3004 o (off-diagonal) submatrix. Note that the d and the o submatrices are
3005 stored simply in the `MATSEQBAIJ` format for compressed row storage.
3006
3007 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3008 and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3009 In general, for PDE problems in which most nonzeros are near the diagonal,
3010 one expects `d_nz` >> `o_nz`.
3011
3012 You can call `MatGetInfo()` to get information on how effective the preallocation was;
3013 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3014 You can also run with the option `-info` and look for messages with the string
3015 malloc in them to see if additional memory allocation was needed.
3016
3017 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3018 @*/
MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])3019 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3020 {
3021 PetscFunctionBegin;
3022 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3023 PetscValidType(B, 1);
3024 PetscValidLogicalCollectiveInt(B, bs, 2);
3025 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3026 PetscFunctionReturn(PETSC_SUCCESS);
3027 }
3028
3029 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3030 /*@
3031 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3032 (block compressed row).
3033
3034 Collective
3035
3036 Input Parameters:
3037 + comm - MPI communicator
3038 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3039 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3040 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3041 This value should be the same as the local size used in creating the
3042 y vector for the matrix-vector product y = Ax.
3043 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3044 This value should be the same as the local size used in creating the
3045 x vector for the matrix-vector product y = Ax.
3046 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3047 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3048 . d_nz - number of nonzero blocks per block row in diagonal portion of local
3049 submatrix (same for all local rows)
3050 . d_nnz - array containing the number of nonzero blocks in the various block rows
3051 of the in diagonal portion of the local (possibly different for each block
3052 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
3053 and set it even if it is zero.
3054 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
3055 submatrix (same for all local rows).
3056 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3057 off-diagonal portion of the local submatrix (possibly different for
3058 each block row) or NULL.
3059
3060 Output Parameter:
3061 . A - the matrix
3062
3063 Options Database Keys:
3064 + -mat_block_size - size of the blocks to use
3065 - -mat_use_hash_table <fact> - set hash table factor
3066
3067 Level: intermediate
3068
3069 Notes:
3070 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3071 MatXXXXSetPreallocation() paradigm instead of this routine directly.
3072 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3073
3074 For good matrix assembly performance
3075 the user should preallocate the matrix storage by setting the parameters
3076 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately,
3077 performance can be increased by more than a factor of 50.
3078
3079 If the *_nnz parameter is given then the *_nz parameter is ignored
3080
3081 A nonzero block is any block that as 1 or more nonzeros in it
3082
3083 The user MUST specify either the local or global matrix dimensions
3084 (possibly both).
3085
3086 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
3087 than it must be used on all processors that share the object for that argument.
3088
3089 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3090 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3091
3092 Storage Information:
3093 For a square global matrix we define each processor's diagonal portion
3094 to be its local rows and the corresponding columns (a square submatrix);
3095 each processor's off-diagonal portion encompasses the remainder of the
3096 local matrix (a rectangular submatrix).
3097
3098 The user can specify preallocated storage for the diagonal part of
3099 the local submatrix with either d_nz or d_nnz (not both). Set
3100 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3101 memory allocation. Likewise, specify preallocated storage for the
3102 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3103
3104 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3105 the figure below we depict these three local rows and all columns (0-11).
3106
3107 .vb
3108 0 1 2 3 4 5 6 7 8 9 10 11
3109 --------------------------
3110 row 3 |o o o d d d o o o o o o
3111 row 4 |o o o d d d o o o o o o
3112 row 5 |o o o d d d o o o o o o
3113 --------------------------
3114 .ve
3115
3116 Thus, any entries in the d locations are stored in the d (diagonal)
3117 submatrix, and any entries in the o locations are stored in the
3118 o (off-diagonal) submatrix. Note that the d and the o submatrices are
3119 stored simply in the `MATSEQBAIJ` format for compressed row storage.
3120
3121 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3122 and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3123 In general, for PDE problems in which most nonzeros are near the diagonal,
3124 one expects `d_nz` >> `o_nz`.
3125
3126 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3127 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3128 @*/
MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)3129 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3130 {
3131 PetscMPIInt size;
3132
3133 PetscFunctionBegin;
3134 PetscCall(MatCreate(comm, A));
3135 PetscCall(MatSetSizes(*A, m, n, M, N));
3136 PetscCallMPI(MPI_Comm_size(comm, &size));
3137 if (size > 1) {
3138 PetscCall(MatSetType(*A, MATMPIBAIJ));
3139 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3140 } else {
3141 PetscCall(MatSetType(*A, MATSEQBAIJ));
3142 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3143 }
3144 PetscFunctionReturn(PETSC_SUCCESS);
3145 }
3146
MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)3147 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3148 {
3149 Mat mat;
3150 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3151 PetscInt len = 0;
3152
3153 PetscFunctionBegin;
3154 *newmat = NULL;
3155 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3156 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3157 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3158
3159 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3160 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3161 if (matin->hash_active) {
3162 PetscCall(MatSetUp(mat));
3163 } else {
3164 mat->factortype = matin->factortype;
3165 mat->preallocated = PETSC_TRUE;
3166 mat->assembled = PETSC_TRUE;
3167 mat->insertmode = NOT_SET_VALUES;
3168
3169 a = (Mat_MPIBAIJ *)mat->data;
3170 mat->rmap->bs = matin->rmap->bs;
3171 a->bs2 = oldmat->bs2;
3172 a->mbs = oldmat->mbs;
3173 a->nbs = oldmat->nbs;
3174 a->Mbs = oldmat->Mbs;
3175 a->Nbs = oldmat->Nbs;
3176
3177 a->size = oldmat->size;
3178 a->rank = oldmat->rank;
3179 a->donotstash = oldmat->donotstash;
3180 a->roworiented = oldmat->roworiented;
3181 a->rowindices = NULL;
3182 a->rowvalues = NULL;
3183 a->getrowactive = PETSC_FALSE;
3184 a->barray = NULL;
3185 a->rstartbs = oldmat->rstartbs;
3186 a->rendbs = oldmat->rendbs;
3187 a->cstartbs = oldmat->cstartbs;
3188 a->cendbs = oldmat->cendbs;
3189
3190 /* hash table stuff */
3191 a->ht = NULL;
3192 a->hd = NULL;
3193 a->ht_size = 0;
3194 a->ht_flag = oldmat->ht_flag;
3195 a->ht_fact = oldmat->ht_fact;
3196 a->ht_total_ct = 0;
3197 a->ht_insert_ct = 0;
3198
3199 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3200 if (oldmat->colmap) {
3201 #if defined(PETSC_USE_CTABLE)
3202 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3203 #else
3204 PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3205 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3206 #endif
3207 } else a->colmap = NULL;
3208
3209 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3210 PetscCall(PetscMalloc1(len, &a->garray));
3211 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3212 } else a->garray = NULL;
3213
3214 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3215 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3216 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3217
3218 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3219 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3220 }
3221 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3222 *newmat = mat;
3223 PetscFunctionReturn(PETSC_SUCCESS);
3224 }
3225
3226 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)3227 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3228 {
3229 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3230 PetscInt *rowidxs, *colidxs, rs, cs, ce;
3231 PetscScalar *matvals;
3232
3233 PetscFunctionBegin;
3234 PetscCall(PetscViewerSetUp(viewer));
3235
3236 /* read in matrix header */
3237 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3238 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3239 M = header[1];
3240 N = header[2];
3241 nz = header[3];
3242 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3243 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3244 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3245
3246 /* set block sizes from the viewer's .info file */
3247 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3248 /* set local sizes if not set already */
3249 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3250 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3251 /* set global sizes if not set already */
3252 if (mat->rmap->N < 0) mat->rmap->N = M;
3253 if (mat->cmap->N < 0) mat->cmap->N = N;
3254 PetscCall(PetscLayoutSetUp(mat->rmap));
3255 PetscCall(PetscLayoutSetUp(mat->cmap));
3256
3257 /* check if the matrix sizes are correct */
3258 PetscCall(MatGetSize(mat, &rows, &cols));
3259 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3260 PetscCall(MatGetBlockSize(mat, &bs));
3261 PetscCall(MatGetLocalSize(mat, &m, &n));
3262 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3263 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3264 mbs = m / bs;
3265 nbs = n / bs;
3266
3267 /* read in row lengths and build row indices */
3268 PetscCall(PetscMalloc1(m + 1, &rowidxs));
3269 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3270 rowidxs[0] = 0;
3271 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3272 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3273 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3274
3275 /* read in column indices and matrix values */
3276 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3277 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3278 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3279
3280 { /* preallocate matrix storage */
3281 PetscBT bt; /* helper bit set to count diagonal nonzeros */
3282 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3283 PetscBool sbaij, done;
3284 PetscInt *d_nnz, *o_nnz;
3285
3286 PetscCall(PetscBTCreate(nbs, &bt));
3287 PetscCall(PetscHSetICreate(&ht));
3288 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3289 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3290 for (i = 0; i < mbs; i++) {
3291 PetscCall(PetscBTMemzero(nbs, bt));
3292 PetscCall(PetscHSetIClear(ht));
3293 for (k = 0; k < bs; k++) {
3294 PetscInt row = bs * i + k;
3295 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3296 PetscInt col = colidxs[j];
3297 if (!sbaij || col >= row) {
3298 if (col >= cs && col < ce) {
3299 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3300 } else {
3301 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3302 if (done) o_nnz[i]++;
3303 }
3304 }
3305 }
3306 }
3307 }
3308 PetscCall(PetscBTDestroy(&bt));
3309 PetscCall(PetscHSetIDestroy(&ht));
3310 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3311 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3312 PetscCall(PetscFree2(d_nnz, o_nnz));
3313 }
3314
3315 /* store matrix values */
3316 for (i = 0; i < m; i++) {
3317 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3318 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3319 }
3320
3321 PetscCall(PetscFree(rowidxs));
3322 PetscCall(PetscFree2(colidxs, matvals));
3323 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3324 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3325 PetscFunctionReturn(PETSC_SUCCESS);
3326 }
3327
MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)3328 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3329 {
3330 PetscBool isbinary;
3331
3332 PetscFunctionBegin;
3333 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3334 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3335 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3336 PetscFunctionReturn(PETSC_SUCCESS);
3337 }
3338
3339 /*@
3340 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3341
3342 Input Parameters:
3343 + mat - the matrix
3344 - fact - factor
3345
3346 Options Database Key:
3347 . -mat_use_hash_table <fact> - provide the factor
3348
3349 Level: advanced
3350
3351 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3352 @*/
MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)3353 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3354 {
3355 PetscFunctionBegin;
3356 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3357 PetscFunctionReturn(PETSC_SUCCESS);
3358 }
3359
MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)3360 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3361 {
3362 Mat_MPIBAIJ *baij;
3363
3364 PetscFunctionBegin;
3365 baij = (Mat_MPIBAIJ *)mat->data;
3366 baij->ht_fact = fact;
3367 PetscFunctionReturn(PETSC_SUCCESS);
3368 }
3369
MatMPIBAIJGetSeqBAIJ(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])3370 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3371 {
3372 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3373 PetscBool flg;
3374
3375 PetscFunctionBegin;
3376 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3377 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3378 if (Ad) *Ad = a->A;
3379 if (Ao) *Ao = a->B;
3380 if (colmap) *colmap = a->garray;
3381 PetscFunctionReturn(PETSC_SUCCESS);
3382 }
3383
3384 /*
3385 Special version for direct calls from Fortran (to eliminate two function call overheads
3386 */
3387 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3388 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3389 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3390 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3391 #endif
3392
3393 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3394 /*@C
3395 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3396
3397 Collective
3398
3399 Input Parameters:
3400 + matin - the matrix
3401 . min - number of input rows
3402 . im - input rows
3403 . nin - number of input columns
3404 . in - input columns
3405 . v - numerical values input
3406 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3407
3408 Level: advanced
3409
3410 Developer Notes:
3411 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3412
3413 .seealso: `Mat`, `MatSetValuesBlocked()`
3414 @*/
matmpibaijsetvaluesblocked_(Mat * matin,PetscInt * min,const PetscInt im[],PetscInt * nin,const PetscInt in[],const MatScalar v[],InsertMode * addvin)3415 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3416 {
3417 /* convert input arguments to C version */
3418 Mat mat = *matin;
3419 PetscInt m = *min, n = *nin;
3420 InsertMode addv = *addvin;
3421
3422 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
3423 const MatScalar *value;
3424 MatScalar *barray = baij->barray;
3425 PetscBool roworiented = baij->roworiented;
3426 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs;
3427 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3428 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3429
3430 PetscFunctionBegin;
3431 /* tasks normally handled by MatSetValuesBlocked() */
3432 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3433 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3434 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3435 if (mat->assembled) {
3436 mat->was_assembled = PETSC_TRUE;
3437 mat->assembled = PETSC_FALSE;
3438 }
3439 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3440
3441 if (!barray) {
3442 PetscCall(PetscMalloc1(bs2, &barray));
3443 baij->barray = barray;
3444 }
3445
3446 if (roworiented) stepval = (n - 1) * bs;
3447 else stepval = (m - 1) * bs;
3448
3449 for (i = 0; i < m; i++) {
3450 if (im[i] < 0) continue;
3451 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3452 if (im[i] >= rstart && im[i] < rend) {
3453 row = im[i] - rstart;
3454 for (j = 0; j < n; j++) {
3455 /* If NumCol = 1 then a copy is not required */
3456 if ((roworiented) && (n == 1)) {
3457 barray = (MatScalar *)v + i * bs2;
3458 } else if ((!roworiented) && (m == 1)) {
3459 barray = (MatScalar *)v + j * bs2;
3460 } else { /* Here a copy is required */
3461 if (roworiented) {
3462 value = v + i * (stepval + bs) * bs + j * bs;
3463 } else {
3464 value = v + j * (stepval + bs) * bs + i * bs;
3465 }
3466 for (ii = 0; ii < bs; ii++, value += stepval) {
3467 for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3468 }
3469 barray -= bs2;
3470 }
3471
3472 if (in[j] >= cstart && in[j] < cend) {
3473 col = in[j] - cstart;
3474 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3475 } else if (in[j] < 0) {
3476 continue;
3477 } else {
3478 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3479 if (mat->was_assembled) {
3480 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3481
3482 #if defined(PETSC_USE_DEBUG)
3483 #if defined(PETSC_USE_CTABLE)
3484 {
3485 PetscInt data;
3486 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3487 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3488 }
3489 #else
3490 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3491 #endif
3492 #endif
3493 #if defined(PETSC_USE_CTABLE)
3494 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3495 col = (col - 1) / bs;
3496 #else
3497 col = (baij->colmap[in[j]] - 1) / bs;
3498 #endif
3499 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3500 PetscCall(MatDisAssemble_MPIBAIJ(mat));
3501 col = in[j];
3502 }
3503 } else col = in[j];
3504 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3505 }
3506 }
3507 } else {
3508 if (!baij->donotstash) {
3509 if (roworiented) {
3510 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3511 } else {
3512 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3513 }
3514 }
3515 }
3516 }
3517
3518 /* task normally handled by MatSetValuesBlocked() */
3519 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3520 PetscFunctionReturn(PETSC_SUCCESS);
3521 }
3522
3523 /*@
3524 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3525
3526 Collective
3527
3528 Input Parameters:
3529 + comm - MPI communicator
3530 . bs - the block size, only a block size of 1 is supported
3531 . m - number of local rows (Cannot be `PETSC_DECIDE`)
3532 . n - This value should be the same as the local size used in creating the
3533 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3534 calculated if `N` is given) For square matrices `n` is almost always `m`.
3535 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3536 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3537 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3538 . j - column indices
3539 - a - matrix values
3540
3541 Output Parameter:
3542 . mat - the matrix
3543
3544 Level: intermediate
3545
3546 Notes:
3547 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3548 thus you CANNOT change the matrix entries by changing the values of a[] after you have
3549 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3550
3551 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3552 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3553 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3554 with column-major ordering within blocks.
3555
3556 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3557
3558 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3559 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3560 @*/
MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)3561 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3562 {
3563 PetscFunctionBegin;
3564 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3565 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3566 PetscCall(MatCreate(comm, mat));
3567 PetscCall(MatSetSizes(*mat, m, n, M, N));
3568 PetscCall(MatSetType(*mat, MATMPIBAIJ));
3569 PetscCall(MatSetBlockSize(*mat, bs));
3570 PetscCall(MatSetUp(*mat));
3571 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3572 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3573 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3574 PetscFunctionReturn(PETSC_SUCCESS);
3575 }
3576
MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3577 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3578 {
3579 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs;
3580 PetscInt *indx;
3581 PetscScalar *values;
3582
3583 PetscFunctionBegin;
3584 PetscCall(MatGetSize(inmat, &m, &N));
3585 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3586 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3587 PetscInt *dnz, *onz, mbs, Nbs, nbs;
3588 PetscInt *bindx, rmax = a->rmax, j;
3589 PetscMPIInt rank, size;
3590
3591 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3592 mbs = m / bs;
3593 Nbs = N / cbs;
3594 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3595 nbs = n / cbs;
3596
3597 PetscCall(PetscMalloc1(rmax, &bindx));
3598 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3599
3600 PetscCallMPI(MPI_Comm_rank(comm, &rank));
3601 PetscCallMPI(MPI_Comm_size(comm, &size));
3602 if (rank == size - 1) {
3603 /* Check sum(nbs) = Nbs */
3604 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3605 }
3606
3607 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3608 for (i = 0; i < mbs; i++) {
3609 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3610 nnz = nnz / bs;
3611 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3612 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3613 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3614 }
3615 PetscCall(PetscFree(bindx));
3616
3617 PetscCall(MatCreate(comm, outmat));
3618 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3619 PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3620 PetscCall(MatSetType(*outmat, MATBAIJ));
3621 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3622 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3623 MatPreallocateEnd(dnz, onz);
3624 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3625 }
3626
3627 /* numeric phase */
3628 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3629 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3630
3631 for (i = 0; i < m; i++) {
3632 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3633 Ii = i + rstart;
3634 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3635 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3636 }
3637 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3638 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3639 PetscFunctionReturn(PETSC_SUCCESS);
3640 }
3641