xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5a884c48ab0c46bab83cd9bb8710f380fa6d8bcf)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
MatDestroy_MPIBAIJ(Mat mat)7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
MatGetRowSumAbs_MPIBAIJ(Mat A,Vec v)100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
MatStoreValues_MPIBAIJ(Mat mat)117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
MatRetrieveValues_MPIBAIJ(Mat mat)127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
MatCreateColmap_MPIBAIJ_Private(Mat mat)143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
663     row = idxm[i] - bsrstart;
664     for (j = 0; j < n; j++) {
665       if (idxn[j] < 0) continue; /* negative column */
666       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667       if (idxn[j] >= bscstart && idxn[j] < bscend) {
668         col = idxn[j] - bscstart;
669         PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670       } else {
671         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674         data--;
675 #else
676         data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
679         else {
680           col = data + idxn[j] % bs;
681           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682         }
683       }
684     }
685   }
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal * nrm)689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
690 {
691   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
692   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
693   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
694   PetscReal    sum = 0.0;
695   MatScalar   *v;
696 
697   PetscFunctionBegin;
698   if (baij->size == 1) {
699     PetscCall(MatNorm(baij->A, type, nrm));
700   } else {
701     if (type == NORM_FROBENIUS) {
702       v  = amat->a;
703       nz = amat->nz * bs2;
704       for (i = 0; i < nz; i++) {
705         sum += PetscRealPart(PetscConj(*v) * (*v));
706         v++;
707       }
708       v  = bmat->a;
709       nz = bmat->nz * bs2;
710       for (i = 0; i < nz; i++) {
711         sum += PetscRealPart(PetscConj(*v) * (*v));
712         v++;
713       }
714       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
715       *nrm = PetscSqrtReal(*nrm);
716     } else if (type == NORM_1) { /* max column sum */
717       Vec          col, bcol;
718       PetscScalar *array;
719       PetscInt    *jj, *garray = baij->garray;
720 
721       PetscCall(MatCreateVecs(mat, &col, NULL));
722       PetscCall(VecSet(col, 0.0));
723       PetscCall(VecGetArrayWrite(col, &array));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           PetscInt col = bs * *jj + j; /* column index */
729 
730           for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
731         }
732         jj++;
733       }
734       PetscCall(VecRestoreArrayWrite(col, &array));
735       PetscCall(MatCreateVecs(baij->B, &bcol, NULL));
736       PetscCall(VecSet(bcol, 0.0));
737       PetscCall(VecGetArrayWrite(bcol, &array));
738       v  = bmat->a;
739       jj = bmat->j;
740       for (i = 0; i < bmat->nz; i++) {
741         for (j = 0; j < bs; j++) {
742           PetscInt col = bs * *jj + j; /* column index */
743 
744           for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
745         }
746         jj++;
747       }
748       PetscCall(VecSetValuesBlocked(col, bmat->nbs, garray, array, ADD_VALUES));
749       PetscCall(VecRestoreArrayWrite(bcol, &array));
750       PetscCall(VecDestroy(&bcol));
751       PetscCall(VecAssemblyBegin(col));
752       PetscCall(VecAssemblyEnd(col));
753       PetscCall(VecNorm(col, NORM_INFINITY, nrm));
754       PetscCall(VecDestroy(&col));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entries need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, all_assembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the row-oriented flag to column-oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented                           = PETSC_FALSE;
938     a->roworiented                              = PETSC_FALSE;
939     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
940     while (1) {
941       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942       if (!flg) break;
943 
944       for (i = 0; i < n;) {
945         /* Now identify the consecutive vals belonging to the same row */
946         for (j = i, rstart = row[j]; j < n; j++) {
947           if (row[j] != rstart) break;
948         }
949         if (j < n) ncols = j - i;
950         else ncols = n - i;
951         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952         i = j;
953       }
954     }
955     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956 
957     baij->roworiented                           = r1;
958     a->roworiented                              = r2;
959     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960   }
961 
962   PetscCall(MatAssemblyBegin(baij->A, mode));
963   PetscCall(MatAssemblyEnd(baij->A, mode));
964 
965   /* determine if any process has disassembled, if so we must
966      also disassemble ourselves, in order that we may reassemble. */
967   /*
968      if nonzero structure of submatrix B cannot change then we know that
969      no process disassembled thus we can skip this stuff
970   */
971   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974   }
975 
976   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977   PetscCall(MatAssemblyBegin(baij->B, mode));
978   PetscCall(MatAssemblyEnd(baij->B, mode));
979 
980 #if defined(PETSC_USE_INFO)
981   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983 
984     baij->ht_total_ct  = 0;
985     baij->ht_insert_ct = 0;
986   }
987 #endif
988   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990 
991     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
992     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993   }
994 
995   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996 
997   baij->rowvalues = NULL;
998 
999   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1001     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 #include <petscdraw.h>
MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)1008 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1009 {
1010   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1011   PetscMPIInt       rank = baij->rank;
1012   PetscInt          bs   = mat->rmap->bs;
1013   PetscBool         isascii, isdraw;
1014   PetscViewer       sviewer;
1015   PetscViewerFormat format;
1016 
1017   PetscFunctionBegin;
1018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1020   if (isascii) {
1021     PetscCall(PetscViewerGetFormat(viewer, &format));
1022     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1023       MatInfo info;
1024       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1025       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1026       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1027       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1028                                                    mat->rmap->bs, info.memory));
1029       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1031       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1033       PetscCall(PetscViewerFlush(viewer));
1034       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1035       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1036       PetscCall(VecScatterView(baij->Mvctx, viewer));
1037       PetscFunctionReturn(PETSC_SUCCESS);
1038     } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_FACTOR_INFO) PetscFunctionReturn(PETSC_SUCCESS);
1039   }
1040 
1041   if (isdraw) {
1042     PetscDraw draw;
1043     PetscBool isnull;
1044     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1045     PetscCall(PetscDrawIsNull(draw, &isnull));
1046     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1047   }
1048 
1049   {
1050     /* assemble the entire matrix onto first processor. */
1051     Mat          A;
1052     Mat_SeqBAIJ *Aloc;
1053     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1054     MatScalar   *a;
1055     const char  *matname;
1056 
1057     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1058     /* Perhaps this should be the type of mat? */
1059     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1060     if (rank == 0) {
1061       PetscCall(MatSetSizes(A, M, N, M, N));
1062     } else {
1063       PetscCall(MatSetSizes(A, 0, 0, M, N));
1064     }
1065     PetscCall(MatSetType(A, MATMPIBAIJ));
1066     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1067     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1068 
1069     /* copy over the A part */
1070     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1071     ai   = Aloc->i;
1072     aj   = Aloc->j;
1073     a    = Aloc->a;
1074     PetscCall(PetscMalloc1(bs, &rvals));
1075 
1076     for (i = 0; i < mbs; i++) {
1077       rvals[0] = bs * (baij->rstartbs + i);
1078       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1079       for (j = ai[i]; j < ai[i + 1]; j++) {
1080         col = (baij->cstartbs + aj[j]) * bs;
1081         for (k = 0; k < bs; k++) {
1082           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1083           col++;
1084           a += bs;
1085         }
1086       }
1087     }
1088     /* copy over the B part */
1089     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1090     ai   = Aloc->i;
1091     aj   = Aloc->j;
1092     a    = Aloc->a;
1093     for (i = 0; i < mbs; i++) {
1094       rvals[0] = bs * (baij->rstartbs + i);
1095       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1096       for (j = ai[i]; j < ai[i + 1]; j++) {
1097         col = baij->garray[aj[j]] * bs;
1098         for (k = 0; k < bs; k++) {
1099           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1100           col++;
1101           a += bs;
1102         }
1103       }
1104     }
1105     PetscCall(PetscFree(rvals));
1106     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1107     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1108     /*
1109        Everyone has to call to draw the matrix since the graphics waits are
1110        synchronized across all processors that share the PetscDraw object
1111     */
1112     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1113     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1114     if (rank == 0) {
1115       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1116       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1117     }
1118     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1119     PetscCall(MatDestroy(&A));
1120   }
1121   PetscFunctionReturn(PETSC_SUCCESS);
1122 }
1123 
1124 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)1125 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1126 {
1127   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1128   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1129   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1130   const PetscInt *garray = aij->garray;
1131   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1132   PetscCount      nz, hnz;
1133   PetscInt       *rowlens, *colidxs;
1134   PetscScalar    *matvals;
1135   PetscMPIInt     rank;
1136 
1137   PetscFunctionBegin;
1138   PetscCall(PetscViewerSetUp(viewer));
1139 
1140   M  = mat->rmap->N;
1141   N  = mat->cmap->N;
1142   m  = mat->rmap->n;
1143   rs = mat->rmap->rstart;
1144   cs = mat->cmap->rstart;
1145   bs = mat->rmap->bs;
1146   nz = bs * bs * (A->nz + B->nz);
1147 
1148   /* write matrix header */
1149   header[0] = MAT_FILE_CLASSID;
1150   header[1] = M;
1151   header[2] = N;
1152   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1153   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1154   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1155   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1156 
1157   /* fill in and store row lengths */
1158   PetscCall(PetscMalloc1(m, &rowlens));
1159   for (cnt = 0, i = 0; i < A->mbs; i++)
1160     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1161   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1162   PetscCall(PetscFree(rowlens));
1163 
1164   /* fill in and store column indices */
1165   PetscCall(PetscMalloc1(nz, &colidxs));
1166   for (cnt = 0, i = 0; i < A->mbs; i++) {
1167     for (k = 0; k < bs; k++) {
1168       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1169         if (garray[B->j[jb]] > cs / bs) break;
1170         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1171       }
1172       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1173         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1174       for (; jb < B->i[i + 1]; jb++)
1175         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1176     }
1177   }
1178   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1179   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1180   PetscCall(PetscFree(colidxs));
1181 
1182   /* fill in and store nonzero values */
1183   PetscCall(PetscMalloc1(nz, &matvals));
1184   for (cnt = 0, i = 0; i < A->mbs; i++) {
1185     for (k = 0; k < bs; k++) {
1186       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1187         if (garray[B->j[jb]] > cs / bs) break;
1188         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1189       }
1190       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1191         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1192       for (; jb < B->i[i + 1]; jb++)
1193         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1194     }
1195   }
1196   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1197   PetscCall(PetscFree(matvals));
1198 
1199   /* write block size option to the viewer's .info file */
1200   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
MatView_MPIBAIJ(Mat mat,PetscViewer viewer)1204 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1205 {
1206   PetscBool isascii, isdraw, issocket, isbinary;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1210   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1211   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1213   if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1214   else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1215   PetscFunctionReturn(PETSC_SUCCESS);
1216 }
1217 
MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)1218 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1219 {
1220   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1221   PetscInt     nt;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(VecGetLocalSize(xx, &nt));
1225   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1226   PetscCall(VecGetLocalSize(yy, &nt));
1227   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1228   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1229   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1230   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1231   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1232   PetscFunctionReturn(PETSC_SUCCESS);
1233 }
1234 
MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1235 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1236 {
1237   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1238 
1239   PetscFunctionBegin;
1240   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1241   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1242   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1243   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1244   PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246 
MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)1247 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1248 {
1249   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1250 
1251   PetscFunctionBegin;
1252   /* do nondiagonal part */
1253   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1254   /* do local part */
1255   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1256   /* add partial results together */
1257   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1258   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1259   PetscFunctionReturn(PETSC_SUCCESS);
1260 }
1261 
MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1262 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1263 {
1264   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1265 
1266   PetscFunctionBegin;
1267   /* do nondiagonal part */
1268   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1269   /* do local part */
1270   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1271   /* add partial results together */
1272   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1273   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 /*
1278   This only works correctly for square matrices where the subblock A->A is the
1279    diagonal block
1280 */
MatGetDiagonal_MPIBAIJ(Mat A,Vec v)1281 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1282 {
1283   PetscFunctionBegin;
1284   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1285   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1286   PetscFunctionReturn(PETSC_SUCCESS);
1287 }
1288 
MatScale_MPIBAIJ(Mat A,PetscScalar aa)1289 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1290 {
1291   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1292 
1293   PetscFunctionBegin;
1294   PetscCall(MatScale(a->A, aa));
1295   PetscCall(MatScale(a->B, aa));
1296   PetscFunctionReturn(PETSC_SUCCESS);
1297 }
1298 
MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1299 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1300 {
1301   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1302   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1303   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1304   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1305   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1306 
1307   PetscFunctionBegin;
1308   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1309   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1310   mat->getrowactive = PETSC_TRUE;
1311 
1312   if (!mat->rowvalues && (idx || v)) {
1313     /*
1314         allocate enough space to hold information from the longest row.
1315     */
1316     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1317     PetscInt     max = 1, mbs = mat->mbs, tmp;
1318     for (i = 0; i < mbs; i++) {
1319       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1320       if (max < tmp) max = tmp;
1321     }
1322     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1323   }
1324   lrow = row - brstart;
1325 
1326   pvA = &vworkA;
1327   pcA = &cworkA;
1328   pvB = &vworkB;
1329   pcB = &cworkB;
1330   if (!v) {
1331     pvA = NULL;
1332     pvB = NULL;
1333   }
1334   if (!idx) {
1335     pcA = NULL;
1336     if (!v) pcB = NULL;
1337   }
1338   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1339   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1340   nztot = nzA + nzB;
1341 
1342   cmap = mat->garray;
1343   if (v || idx) {
1344     if (nztot) {
1345       /* Sort by increasing column numbers, assuming A and B already sorted */
1346       PetscInt imark = -1;
1347       if (v) {
1348         *v = v_p = mat->rowvalues;
1349         for (i = 0; i < nzB; i++) {
1350           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1351           else break;
1352         }
1353         imark = i;
1354         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1355         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1356       }
1357       if (idx) {
1358         *idx = idx_p = mat->rowindices;
1359         if (imark > -1) {
1360           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1361         } else {
1362           for (i = 0; i < nzB; i++) {
1363             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1364             else break;
1365           }
1366           imark = i;
1367         }
1368         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1369         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1370       }
1371     } else {
1372       if (idx) *idx = NULL;
1373       if (v) *v = NULL;
1374     }
1375   }
1376   *nz = nztot;
1377   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1378   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 
MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1382 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1383 {
1384   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1385 
1386   PetscFunctionBegin;
1387   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1388   baij->getrowactive = PETSC_FALSE;
1389   PetscFunctionReturn(PETSC_SUCCESS);
1390 }
1391 
MatZeroEntries_MPIBAIJ(Mat A)1392 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1393 {
1394   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1395 
1396   PetscFunctionBegin;
1397   PetscCall(MatZeroEntries(l->A));
1398   PetscCall(MatZeroEntries(l->B));
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo * info)1402 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1403 {
1404   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1405   Mat            A = a->A, B = a->B;
1406   PetscLogDouble isend[5], irecv[5];
1407 
1408   PetscFunctionBegin;
1409   info->block_size = (PetscReal)matin->rmap->bs;
1410 
1411   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1412 
1413   isend[0] = info->nz_used;
1414   isend[1] = info->nz_allocated;
1415   isend[2] = info->nz_unneeded;
1416   isend[3] = info->memory;
1417   isend[4] = info->mallocs;
1418 
1419   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1420 
1421   isend[0] += info->nz_used;
1422   isend[1] += info->nz_allocated;
1423   isend[2] += info->nz_unneeded;
1424   isend[3] += info->memory;
1425   isend[4] += info->mallocs;
1426 
1427   if (flag == MAT_LOCAL) {
1428     info->nz_used      = isend[0];
1429     info->nz_allocated = isend[1];
1430     info->nz_unneeded  = isend[2];
1431     info->memory       = isend[3];
1432     info->mallocs      = isend[4];
1433   } else if (flag == MAT_GLOBAL_MAX) {
1434     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1435 
1436     info->nz_used      = irecv[0];
1437     info->nz_allocated = irecv[1];
1438     info->nz_unneeded  = irecv[2];
1439     info->memory       = irecv[3];
1440     info->mallocs      = irecv[4];
1441   } else if (flag == MAT_GLOBAL_SUM) {
1442     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1443 
1444     info->nz_used      = irecv[0];
1445     info->nz_allocated = irecv[1];
1446     info->nz_unneeded  = irecv[2];
1447     info->memory       = irecv[3];
1448     info->mallocs      = irecv[4];
1449   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1450   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1451   info->fill_ratio_needed = 0;
1452   info->factor_mallocs    = 0;
1453   PetscFunctionReturn(PETSC_SUCCESS);
1454 }
1455 
MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)1456 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1457 {
1458   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1459 
1460   PetscFunctionBegin;
1461   switch (op) {
1462   case MAT_NEW_NONZERO_LOCATIONS:
1463   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1464   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1465   case MAT_KEEP_NONZERO_PATTERN:
1466   case MAT_NEW_NONZERO_LOCATION_ERR:
1467     MatCheckPreallocated(A, 1);
1468     PetscCall(MatSetOption(a->A, op, flg));
1469     PetscCall(MatSetOption(a->B, op, flg));
1470     break;
1471   case MAT_ROW_ORIENTED:
1472     MatCheckPreallocated(A, 1);
1473     a->roworiented = flg;
1474 
1475     PetscCall(MatSetOption(a->A, op, flg));
1476     PetscCall(MatSetOption(a->B, op, flg));
1477     break;
1478   case MAT_IGNORE_OFF_PROC_ENTRIES:
1479     a->donotstash = flg;
1480     break;
1481   case MAT_USE_HASH_TABLE:
1482     a->ht_flag = flg;
1483     a->ht_fact = 1.39;
1484     break;
1485   case MAT_SYMMETRIC:
1486   case MAT_STRUCTURALLY_SYMMETRIC:
1487   case MAT_HERMITIAN:
1488   case MAT_SYMMETRY_ETERNAL:
1489   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1490   case MAT_SPD_ETERNAL:
1491     /* if the diagonal matrix is square it inherits some of the properties above */
1492     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1493     break;
1494   default:
1495     break;
1496   }
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat * matout)1500 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1501 {
1502   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1503   Mat_SeqBAIJ *Aloc;
1504   Mat          B;
1505   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1506   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1507   MatScalar   *a;
1508 
1509   PetscFunctionBegin;
1510   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1511   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1512     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1513     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1514     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1515     /* Do not know preallocation information, but must set block size */
1516     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1517   } else {
1518     B = *matout;
1519   }
1520 
1521   /* copy over the A part */
1522   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1523   ai   = Aloc->i;
1524   aj   = Aloc->j;
1525   a    = Aloc->a;
1526   PetscCall(PetscMalloc1(bs, &rvals));
1527 
1528   for (i = 0; i < mbs; i++) {
1529     rvals[0] = bs * (baij->rstartbs + i);
1530     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1531     for (j = ai[i]; j < ai[i + 1]; j++) {
1532       col = (baij->cstartbs + aj[j]) * bs;
1533       for (k = 0; k < bs; k++) {
1534         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1535 
1536         col++;
1537         a += bs;
1538       }
1539     }
1540   }
1541   /* copy over the B part */
1542   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1543   ai   = Aloc->i;
1544   aj   = Aloc->j;
1545   a    = Aloc->a;
1546   for (i = 0; i < mbs; i++) {
1547     rvals[0] = bs * (baij->rstartbs + i);
1548     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1549     for (j = ai[i]; j < ai[i + 1]; j++) {
1550       col = baij->garray[aj[j]] * bs;
1551       for (k = 0; k < bs; k++) {
1552         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1553         col++;
1554         a += bs;
1555       }
1556     }
1557   }
1558   PetscCall(PetscFree(rvals));
1559   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1560   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1561 
1562   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1563   else PetscCall(MatHeaderMerge(A, &B));
1564   PetscFunctionReturn(PETSC_SUCCESS);
1565 }
1566 
MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)1567 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1568 {
1569   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1570   Mat          a = baij->A, b = baij->B;
1571   PetscInt     s1, s2, s3;
1572 
1573   PetscFunctionBegin;
1574   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1575   if (rr) {
1576     PetscCall(VecGetLocalSize(rr, &s1));
1577     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1578     /* Overlap communication with computation. */
1579     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1580   }
1581   if (ll) {
1582     PetscCall(VecGetLocalSize(ll, &s1));
1583     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1584     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1585   }
1586   /* scale  the diagonal block */
1587   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1588 
1589   if (rr) {
1590     /* Do a scatter end and then right scale the off-diagonal block */
1591     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1592     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1593   }
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1597 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1598 {
1599   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1600   PetscInt    *lrows;
1601   PetscInt     r, len;
1602   PetscBool    cong;
1603 
1604   PetscFunctionBegin;
1605   /* get locally owned rows */
1606   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1607   /* fix right-hand side if needed */
1608   if (x && b) {
1609     const PetscScalar *xx;
1610     PetscScalar       *bb;
1611 
1612     PetscCall(VecGetArrayRead(x, &xx));
1613     PetscCall(VecGetArray(b, &bb));
1614     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1615     PetscCall(VecRestoreArrayRead(x, &xx));
1616     PetscCall(VecRestoreArray(b, &bb));
1617   }
1618 
1619   /* actually zap the local rows */
1620   /*
1621         Zero the required rows. If the "diagonal block" of the matrix
1622      is square and the user wishes to set the diagonal we use separate
1623      code so that MatSetValues() is not called for each diagonal allocating
1624      new memory, thus calling lots of mallocs and slowing things down.
1625 
1626   */
1627   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1628   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1629   PetscCall(MatHasCongruentLayouts(A, &cong));
1630   if ((diag != 0.0) && cong) {
1631     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1632   } else if (diag != 0.0) {
1633     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1634     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1635     for (r = 0; r < len; ++r) {
1636       const PetscInt row = lrows[r] + A->rmap->rstart;
1637       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1638     }
1639     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1640     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1641   } else {
1642     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1643   }
1644   PetscCall(PetscFree(lrows));
1645 
1646   /* only change matrix nonzero state if pattern was allowed to be changed */
1647   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1648     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1649     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1650   }
1651   PetscFunctionReturn(PETSC_SUCCESS);
1652 }
1653 
MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1654 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1655 {
1656   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1657   PetscMPIInt        n, p = 0;
1658   PetscInt           i, j, k, r, len = 0, row, col, count;
1659   PetscInt          *lrows, *owners = A->rmap->range;
1660   PetscSFNode       *rrows;
1661   PetscSF            sf;
1662   const PetscScalar *xx;
1663   PetscScalar       *bb, *mask;
1664   Vec                xmask, lmask;
1665   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1666   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1667   PetscScalar       *aa;
1668 
1669   PetscFunctionBegin;
1670   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1671   /* Create SF where leaves are input rows and roots are owned rows */
1672   PetscCall(PetscMalloc1(n, &lrows));
1673   for (r = 0; r < n; ++r) lrows[r] = -1;
1674   PetscCall(PetscMalloc1(N, &rrows));
1675   for (r = 0; r < N; ++r) {
1676     const PetscInt idx = rows[r];
1677     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1678     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1679       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1680     }
1681     rrows[r].rank  = p;
1682     rrows[r].index = rows[r] - owners[p];
1683   }
1684   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1685   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1686   /* Collect flags for rows to be zeroed */
1687   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1688   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1689   PetscCall(PetscSFDestroy(&sf));
1690   /* Compress and put in row numbers */
1691   for (r = 0; r < n; ++r)
1692     if (lrows[r] >= 0) lrows[len++] = r;
1693   /* zero diagonal part of matrix */
1694   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1695   /* handle off-diagonal part of matrix */
1696   PetscCall(MatCreateVecs(A, &xmask, NULL));
1697   PetscCall(VecDuplicate(l->lvec, &lmask));
1698   PetscCall(VecGetArray(xmask, &bb));
1699   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1700   PetscCall(VecRestoreArray(xmask, &bb));
1701   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1702   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1703   PetscCall(VecDestroy(&xmask));
1704   if (x) {
1705     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1706     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1707     PetscCall(VecGetArrayRead(l->lvec, &xx));
1708     PetscCall(VecGetArray(b, &bb));
1709   }
1710   PetscCall(VecGetArray(lmask, &mask));
1711   /* remove zeroed rows of off-diagonal matrix */
1712   for (i = 0; i < len; ++i) {
1713     row   = lrows[i];
1714     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1715     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1716     for (k = 0; k < count; ++k) {
1717       aa[0] = 0.0;
1718       aa += bs;
1719     }
1720   }
1721   /* loop over all elements of off process part of matrix zeroing removed columns*/
1722   for (i = 0; i < l->B->rmap->N; ++i) {
1723     row = i / bs;
1724     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1725       for (k = 0; k < bs; ++k) {
1726         col = bs * baij->j[j] + k;
1727         if (PetscAbsScalar(mask[col])) {
1728           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1729           if (x) bb[i] -= aa[0] * xx[col];
1730           aa[0] = 0.0;
1731         }
1732       }
1733     }
1734   }
1735   if (x) {
1736     PetscCall(VecRestoreArray(b, &bb));
1737     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1738   }
1739   PetscCall(VecRestoreArray(lmask, &mask));
1740   PetscCall(VecDestroy(&lmask));
1741   PetscCall(PetscFree(lrows));
1742 
1743   /* only change matrix nonzero state if pattern was allowed to be changed */
1744   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1745     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1746     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1747   }
1748   PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750 
MatSetUnfactored_MPIBAIJ(Mat A)1751 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1752 {
1753   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1754 
1755   PetscFunctionBegin;
1756   PetscCall(MatSetUnfactored(a->A));
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1761 
MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool * flag)1762 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1763 {
1764   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1765   Mat          a, b, c, d;
1766   PetscBool    flg;
1767 
1768   PetscFunctionBegin;
1769   a = matA->A;
1770   b = matA->B;
1771   c = matB->A;
1772   d = matB->B;
1773 
1774   PetscCall(MatEqual(a, c, &flg));
1775   if (flg) PetscCall(MatEqual(b, d, &flg));
1776   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1777   PetscFunctionReturn(PETSC_SUCCESS);
1778 }
1779 
MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)1780 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1781 {
1782   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1783   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1784 
1785   PetscFunctionBegin;
1786   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1787   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1788     PetscCall(MatCopy_Basic(A, B, str));
1789   } else {
1790     PetscCall(MatCopy(a->A, b->A, str));
1791     PetscCall(MatCopy(a->B, b->B, str));
1792   }
1793   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt * yltog,Mat X,const PetscInt * xltog,PetscInt * nnz)1797 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1798 {
1799   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1800   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1801   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1802 
1803   PetscFunctionBegin;
1804   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)1808 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1809 {
1810   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1811   PetscBLASInt bnz, one                         = 1;
1812   Mat_SeqBAIJ *x, *y;
1813   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1814 
1815   PetscFunctionBegin;
1816   if (str == SAME_NONZERO_PATTERN) {
1817     PetscScalar alpha = a;
1818     x                 = (Mat_SeqBAIJ *)xx->A->data;
1819     y                 = (Mat_SeqBAIJ *)yy->A->data;
1820     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1821     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1822     x = (Mat_SeqBAIJ *)xx->B->data;
1823     y = (Mat_SeqBAIJ *)yy->B->data;
1824     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1825     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1826     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1827   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1828     PetscCall(MatAXPY_Basic(Y, a, X, str));
1829   } else {
1830     Mat       B;
1831     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1832     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1833     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1834     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1835     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1836     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1837     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1838     PetscCall(MatSetType(B, MATMPIBAIJ));
1839     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1840     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1841     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1842     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1843     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1844     PetscCall(MatHeaderMerge(Y, &B));
1845     PetscCall(PetscFree(nnz_d));
1846     PetscCall(PetscFree(nnz_o));
1847   }
1848   PetscFunctionReturn(PETSC_SUCCESS);
1849 }
1850 
MatConjugate_MPIBAIJ(Mat mat)1851 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1852 {
1853   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1854 
1855   PetscFunctionBegin;
1856   PetscCall(MatConjugate_SeqBAIJ(a->A));
1857   PetscCall(MatConjugate_SeqBAIJ(a->B));
1858   PetscFunctionReturn(PETSC_SUCCESS);
1859 }
1860 
MatRealPart_MPIBAIJ(Mat A)1861 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1862 {
1863   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1864 
1865   PetscFunctionBegin;
1866   PetscCall(MatRealPart(a->A));
1867   PetscCall(MatRealPart(a->B));
1868   PetscFunctionReturn(PETSC_SUCCESS);
1869 }
1870 
MatImaginaryPart_MPIBAIJ(Mat A)1871 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1872 {
1873   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1874 
1875   PetscFunctionBegin;
1876   PetscCall(MatImaginaryPart(a->A));
1877   PetscCall(MatImaginaryPart(a->B));
1878   PetscFunctionReturn(PETSC_SUCCESS);
1879 }
1880 
MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)1881 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1882 {
1883   IS       iscol_local;
1884   PetscInt csize;
1885 
1886   PetscFunctionBegin;
1887   PetscCall(ISGetLocalSize(iscol, &csize));
1888   if (call == MAT_REUSE_MATRIX) {
1889     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1890     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1891   } else {
1892     PetscCall(ISAllGather(iscol, &iscol_local));
1893   }
1894   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1895   if (call == MAT_INITIAL_MATRIX) {
1896     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1897     PetscCall(ISDestroy(&iscol_local));
1898   }
1899   PetscFunctionReturn(PETSC_SUCCESS);
1900 }
1901 
1902 /*
1903   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1904   in local and then by concatenating the local matrices the end result.
1905   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1906   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1907 */
MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat * newmat,PetscBool sym)1908 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1909 {
1910   PetscMPIInt  rank, size;
1911   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1912   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1913   Mat          M, Mreuse;
1914   MatScalar   *vwork, *aa;
1915   MPI_Comm     comm;
1916   IS           isrow_new, iscol_new;
1917   Mat_SeqBAIJ *aij;
1918 
1919   PetscFunctionBegin;
1920   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1921   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1922   PetscCallMPI(MPI_Comm_size(comm, &size));
1923   /* The compression and expansion should be avoided. Doesn't point
1924      out errors, might change the indices, hence buggey */
1925   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1926   if (isrow == iscol) {
1927     iscol_new = isrow_new;
1928     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1929   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1930 
1931   if (call == MAT_REUSE_MATRIX) {
1932     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1933     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1934     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1935   } else {
1936     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1937   }
1938   PetscCall(ISDestroy(&isrow_new));
1939   PetscCall(ISDestroy(&iscol_new));
1940   /*
1941       m - number of local rows
1942       n - number of columns (same on all processors)
1943       rstart - first row in new global matrix generated
1944   */
1945   PetscCall(MatGetBlockSize(mat, &bs));
1946   PetscCall(MatGetSize(Mreuse, &m, &n));
1947   m = m / bs;
1948   n = n / bs;
1949 
1950   if (call == MAT_INITIAL_MATRIX) {
1951     aij = (Mat_SeqBAIJ *)Mreuse->data;
1952     ii  = aij->i;
1953     jj  = aij->j;
1954 
1955     /*
1956         Determine the number of non-zeros in the diagonal and off-diagonal
1957         portions of the matrix in order to do correct preallocation
1958     */
1959 
1960     /* first get start and end of "diagonal" columns */
1961     if (csize == PETSC_DECIDE) {
1962       PetscCall(ISGetSize(isrow, &mglobal));
1963       if (mglobal == n * bs) { /* square matrix */
1964         nlocal = m;
1965       } else {
1966         nlocal = n / size + ((n % size) > rank);
1967       }
1968     } else {
1969       nlocal = csize / bs;
1970     }
1971     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1972     rstart = rend - nlocal;
1973     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1974 
1975     /* next, compute all the lengths */
1976     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1977     for (i = 0; i < m; i++) {
1978       jend = ii[i + 1] - ii[i];
1979       olen = 0;
1980       dlen = 0;
1981       for (j = 0; j < jend; j++) {
1982         if (*jj < rstart || *jj >= rend) olen++;
1983         else dlen++;
1984         jj++;
1985       }
1986       olens[i] = olen;
1987       dlens[i] = dlen;
1988     }
1989     PetscCall(MatCreate(comm, &M));
1990     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1991     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1992     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1993     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1994     PetscCall(PetscFree2(dlens, olens));
1995   } else {
1996     PetscInt ml, nl;
1997 
1998     M = *newmat;
1999     PetscCall(MatGetLocalSize(M, &ml, &nl));
2000     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2001     PetscCall(MatZeroEntries(M));
2002     /*
2003          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2004        rather than the slower MatSetValues().
2005     */
2006     M->was_assembled = PETSC_TRUE;
2007     M->assembled     = PETSC_FALSE;
2008   }
2009   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2010   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2011   aij = (Mat_SeqBAIJ *)Mreuse->data;
2012   ii  = aij->i;
2013   jj  = aij->j;
2014   aa  = aij->a;
2015   for (i = 0; i < m; i++) {
2016     row   = rstart / bs + i;
2017     nz    = ii[i + 1] - ii[i];
2018     cwork = jj;
2019     jj    = PetscSafePointerPlusOffset(jj, nz);
2020     vwork = aa;
2021     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2022     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2023   }
2024 
2025   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2026   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2027   *newmat = M;
2028 
2029   /* save submatrix used in processor for next request */
2030   if (call == MAT_INITIAL_MATRIX) {
2031     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2032     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2033   }
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat * B)2037 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2038 {
2039   MPI_Comm        comm, pcomm;
2040   PetscInt        clocal_size, nrows;
2041   const PetscInt *rows;
2042   PetscMPIInt     size;
2043   IS              crowp, lcolp;
2044 
2045   PetscFunctionBegin;
2046   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2047   /* make a collective version of 'rowp' */
2048   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2049   if (pcomm == comm) {
2050     crowp = rowp;
2051   } else {
2052     PetscCall(ISGetSize(rowp, &nrows));
2053     PetscCall(ISGetIndices(rowp, &rows));
2054     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2055     PetscCall(ISRestoreIndices(rowp, &rows));
2056   }
2057   PetscCall(ISSetPermutation(crowp));
2058   /* make a local version of 'colp' */
2059   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2060   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2061   if (size == 1) {
2062     lcolp = colp;
2063   } else {
2064     PetscCall(ISAllGather(colp, &lcolp));
2065   }
2066   PetscCall(ISSetPermutation(lcolp));
2067   /* now we just get the submatrix */
2068   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2069   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2070   /* clean up */
2071   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2072   if (size > 1) PetscCall(ISDestroy(&lcolp));
2073   PetscFunctionReturn(PETSC_SUCCESS);
2074 }
2075 
MatGetGhosts_MPIBAIJ(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])2076 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2077 {
2078   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2079   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2080 
2081   PetscFunctionBegin;
2082   if (nghosts) *nghosts = B->nbs;
2083   if (ghosts) *ghosts = baij->garray;
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat * newmat)2087 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2088 {
2089   Mat          B;
2090   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2091   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2092   Mat_SeqAIJ  *b;
2093   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2094   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2095   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2096 
2097   PetscFunctionBegin;
2098   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2099   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2100 
2101   /*   Tell every processor the number of nonzeros per row  */
2102   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2103   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2104   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2105   displs = recvcounts + size;
2106   for (i = 0; i < size; i++) {
2107     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2108     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2109   }
2110   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2111   /* Create the sequential matrix of the same type as the local block diagonal  */
2112   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2113   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2114   PetscCall(MatSetType(B, MATSEQAIJ));
2115   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2116   b = (Mat_SeqAIJ *)B->data;
2117 
2118   /*     Copy my part of matrix column indices over  */
2119   sendcount  = ad->nz + bd->nz;
2120   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2121   a_jsendbuf = ad->j;
2122   b_jsendbuf = bd->j;
2123   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2124   cnt        = 0;
2125   for (i = 0; i < n; i++) {
2126     /* put in lower diagonal portion */
2127     m = bd->i[i + 1] - bd->i[i];
2128     while (m > 0) {
2129       /* is it above diagonal (in bd (compressed) numbering) */
2130       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2131       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2132       m--;
2133     }
2134 
2135     /* put in diagonal portion */
2136     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2137 
2138     /* put in upper diagonal portion */
2139     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2140   }
2141   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2142 
2143   /*  Gather all column indices to all processors  */
2144   for (i = 0; i < size; i++) {
2145     recvcounts[i] = 0;
2146     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2147   }
2148   displs[0] = 0;
2149   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2150   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2151   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2152   /* set the b->ilen (length of each row) values */
2153   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2154   /* set the b->i indices */
2155   b->i[0] = 0;
2156   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2157   PetscCall(PetscFree(lens));
2158   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2159   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2160   PetscCall(PetscFree(recvcounts));
2161 
2162   PetscCall(MatPropagateSymmetryOptions(A, B));
2163   *newmat = B;
2164   PetscFunctionReturn(PETSC_SUCCESS);
2165 }
2166 
MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)2167 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2168 {
2169   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2170   Vec          bb1 = NULL;
2171 
2172   PetscFunctionBegin;
2173   if (flag == SOR_APPLY_UPPER) {
2174     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2175     PetscFunctionReturn(PETSC_SUCCESS);
2176   }
2177 
2178   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2179 
2180   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2181     if (flag & SOR_ZERO_INITIAL_GUESS) {
2182       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2183       its--;
2184     }
2185 
2186     while (its--) {
2187       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2188       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2189 
2190       /* update rhs: bb1 = bb - B*x */
2191       PetscCall(VecScale(mat->lvec, -1.0));
2192       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2193 
2194       /* local sweep */
2195       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2196     }
2197   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2198     if (flag & SOR_ZERO_INITIAL_GUESS) {
2199       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2200       its--;
2201     }
2202     while (its--) {
2203       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2204       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2205 
2206       /* update rhs: bb1 = bb - B*x */
2207       PetscCall(VecScale(mat->lvec, -1.0));
2208       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2209 
2210       /* local sweep */
2211       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2212     }
2213   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2214     if (flag & SOR_ZERO_INITIAL_GUESS) {
2215       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2216       its--;
2217     }
2218     while (its--) {
2219       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2220       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2221 
2222       /* update rhs: bb1 = bb - B*x */
2223       PetscCall(VecScale(mat->lvec, -1.0));
2224       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2225 
2226       /* local sweep */
2227       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2228     }
2229   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2230 
2231   PetscCall(VecDestroy(&bb1));
2232   PetscFunctionReturn(PETSC_SUCCESS);
2233 }
2234 
MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal * reductions)2235 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2236 {
2237   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2238   PetscInt     m, N, i, *garray = aij->garray;
2239   PetscInt     ib, jb, bs = A->rmap->bs;
2240   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2241   MatScalar   *a_val = a_aij->a;
2242   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2243   MatScalar   *b_val = b_aij->a;
2244   PetscReal   *work;
2245 
2246   PetscFunctionBegin;
2247   PetscCall(MatGetSize(A, &m, &N));
2248   PetscCall(PetscCalloc1(N, &work));
2249   if (type == NORM_2) {
2250     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2251       for (jb = 0; jb < bs; jb++) {
2252         for (ib = 0; ib < bs; ib++) {
2253           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2254           a_val++;
2255         }
2256       }
2257     }
2258     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2259       for (jb = 0; jb < bs; jb++) {
2260         for (ib = 0; ib < bs; ib++) {
2261           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2262           b_val++;
2263         }
2264       }
2265     }
2266   } else if (type == NORM_1) {
2267     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2268       for (jb = 0; jb < bs; jb++) {
2269         for (ib = 0; ib < bs; ib++) {
2270           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2271           a_val++;
2272         }
2273       }
2274     }
2275     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2276       for (jb = 0; jb < bs; jb++) {
2277         for (ib = 0; ib < bs; ib++) {
2278           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2279           b_val++;
2280         }
2281       }
2282     }
2283   } else if (type == NORM_INFINITY) {
2284     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2285       for (jb = 0; jb < bs; jb++) {
2286         for (ib = 0; ib < bs; ib++) {
2287           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2288           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2289           a_val++;
2290         }
2291       }
2292     }
2293     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2294       for (jb = 0; jb < bs; jb++) {
2295         for (ib = 0; ib < bs; ib++) {
2296           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2297           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2298           b_val++;
2299         }
2300       }
2301     }
2302   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2303     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2304       for (jb = 0; jb < bs; jb++) {
2305         for (ib = 0; ib < bs; ib++) {
2306           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2307           a_val++;
2308         }
2309       }
2310     }
2311     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2312       for (jb = 0; jb < bs; jb++) {
2313         for (ib = 0; ib < bs; ib++) {
2314           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2315           b_val++;
2316         }
2317       }
2318     }
2319   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2320     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2321       for (jb = 0; jb < bs; jb++) {
2322         for (ib = 0; ib < bs; ib++) {
2323           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2324           a_val++;
2325         }
2326       }
2327     }
2328     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2329       for (jb = 0; jb < bs; jb++) {
2330         for (ib = 0; ib < bs; ib++) {
2331           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2332           b_val++;
2333         }
2334       }
2335     }
2336   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2337   if (type == NORM_INFINITY) {
2338     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2339   } else {
2340     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2341   }
2342   PetscCall(PetscFree(work));
2343   if (type == NORM_2) {
2344     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2345   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2346     for (i = 0; i < N; i++) reductions[i] /= m;
2347   }
2348   PetscFunctionReturn(PETSC_SUCCESS);
2349 }
2350 
MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar ** values)2351 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2352 {
2353   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2354 
2355   PetscFunctionBegin;
2356   PetscCall(MatInvertBlockDiagonal(a->A, values));
2357   A->factorerrortype             = a->A->factorerrortype;
2358   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2359   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2360   PetscFunctionReturn(PETSC_SUCCESS);
2361 }
2362 
MatShift_MPIBAIJ(Mat Y,PetscScalar a)2363 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2364 {
2365   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2366   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2367 
2368   PetscFunctionBegin;
2369   if (!Y->preallocated) {
2370     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2371   } else if (!aij->nz) {
2372     PetscInt nonew = aij->nonew;
2373     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2374     aij->nonew = nonew;
2375   }
2376   PetscCall(MatShift_Basic(Y, a));
2377   PetscFunctionReturn(PETSC_SUCCESS);
2378 }
2379 
MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat * a)2380 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2381 {
2382   PetscFunctionBegin;
2383   *a = ((Mat_MPIBAIJ *)A->data)->A;
2384   PetscFunctionReturn(PETSC_SUCCESS);
2385 }
2386 
MatEliminateZeros_MPIBAIJ(Mat A,PetscBool keep)2387 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2388 {
2389   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2390 
2391   PetscFunctionBegin;
2392   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2393   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2394   PetscFunctionReturn(PETSC_SUCCESS);
2395 }
2396 
2397 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2398                                        MatGetRow_MPIBAIJ,
2399                                        MatRestoreRow_MPIBAIJ,
2400                                        MatMult_MPIBAIJ,
2401                                        /* 4*/ MatMultAdd_MPIBAIJ,
2402                                        MatMultTranspose_MPIBAIJ,
2403                                        MatMultTransposeAdd_MPIBAIJ,
2404                                        NULL,
2405                                        NULL,
2406                                        NULL,
2407                                        /*10*/ NULL,
2408                                        NULL,
2409                                        NULL,
2410                                        MatSOR_MPIBAIJ,
2411                                        MatTranspose_MPIBAIJ,
2412                                        /*15*/ MatGetInfo_MPIBAIJ,
2413                                        MatEqual_MPIBAIJ,
2414                                        MatGetDiagonal_MPIBAIJ,
2415                                        MatDiagonalScale_MPIBAIJ,
2416                                        MatNorm_MPIBAIJ,
2417                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2418                                        MatAssemblyEnd_MPIBAIJ,
2419                                        MatSetOption_MPIBAIJ,
2420                                        MatZeroEntries_MPIBAIJ,
2421                                        /*24*/ MatZeroRows_MPIBAIJ,
2422                                        NULL,
2423                                        NULL,
2424                                        NULL,
2425                                        NULL,
2426                                        /*29*/ MatSetUp_MPI_Hash,
2427                                        NULL,
2428                                        NULL,
2429                                        MatGetDiagonalBlock_MPIBAIJ,
2430                                        NULL,
2431                                        /*34*/ MatDuplicate_MPIBAIJ,
2432                                        NULL,
2433                                        NULL,
2434                                        NULL,
2435                                        NULL,
2436                                        /*39*/ MatAXPY_MPIBAIJ,
2437                                        MatCreateSubMatrices_MPIBAIJ,
2438                                        MatIncreaseOverlap_MPIBAIJ,
2439                                        MatGetValues_MPIBAIJ,
2440                                        MatCopy_MPIBAIJ,
2441                                        /*44*/ NULL,
2442                                        MatScale_MPIBAIJ,
2443                                        MatShift_MPIBAIJ,
2444                                        NULL,
2445                                        MatZeroRowsColumns_MPIBAIJ,
2446                                        /*49*/ NULL,
2447                                        NULL,
2448                                        NULL,
2449                                        NULL,
2450                                        NULL,
2451                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2452                                        NULL,
2453                                        MatSetUnfactored_MPIBAIJ,
2454                                        MatPermute_MPIBAIJ,
2455                                        MatSetValuesBlocked_MPIBAIJ,
2456                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2457                                        MatDestroy_MPIBAIJ,
2458                                        MatView_MPIBAIJ,
2459                                        NULL,
2460                                        NULL,
2461                                        /*64*/ NULL,
2462                                        NULL,
2463                                        NULL,
2464                                        NULL,
2465                                        MatGetRowMaxAbs_MPIBAIJ,
2466                                        /*69*/ NULL,
2467                                        NULL,
2468                                        NULL,
2469                                        MatFDColoringApply_BAIJ,
2470                                        NULL,
2471                                        /*74*/ NULL,
2472                                        NULL,
2473                                        NULL,
2474                                        NULL,
2475                                        MatLoad_MPIBAIJ,
2476                                        /*79*/ NULL,
2477                                        NULL,
2478                                        NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        /*84*/ NULL,
2482                                        NULL,
2483                                        NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        /*89*/ NULL,
2487                                        NULL,
2488                                        NULL,
2489                                        NULL,
2490                                        MatConjugate_MPIBAIJ,
2491                                        /*94*/ NULL,
2492                                        NULL,
2493                                        MatRealPart_MPIBAIJ,
2494                                        MatImaginaryPart_MPIBAIJ,
2495                                        NULL,
2496                                        /*99*/ NULL,
2497                                        NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2502                                        NULL,
2503                                        MatGetGhosts_MPIBAIJ,
2504                                        NULL,
2505                                        NULL,
2506                                        /*109*/ NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        MatGetMultiProcBlock_MPIBAIJ,
2511                                        /*114*/ NULL,
2512                                        MatGetColumnReductions_MPIBAIJ,
2513                                        MatInvertBlockDiagonal_MPIBAIJ,
2514                                        NULL,
2515                                        NULL,
2516                                        /*119*/ NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        /*124*/ NULL,
2522                                        NULL,
2523                                        MatSetBlockSizes_Default,
2524                                        NULL,
2525                                        MatFDColoringSetUp_MPIXAIJ,
2526                                        /*129*/ NULL,
2527                                        MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2528                                        NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        /*134*/ NULL,
2532                                        NULL,
2533                                        MatEliminateZeros_MPIBAIJ,
2534                                        MatGetRowSumAbs_MPIBAIJ,
2535                                        NULL,
2536                                        /*139*/ NULL,
2537                                        NULL,
2538                                        MatCopyHashToXAIJ_MPI_Hash,
2539                                        NULL,
2540                                        NULL};
2541 
2542 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2543 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2544 
MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2545 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2546 {
2547   PetscInt        m, rstart, cstart, cend;
2548   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2549   const PetscInt *JJ          = NULL;
2550   PetscScalar    *values      = NULL;
2551   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2552   PetscBool       nooffprocentries;
2553 
2554   PetscFunctionBegin;
2555   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2556   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2557   PetscCall(PetscLayoutSetUp(B->rmap));
2558   PetscCall(PetscLayoutSetUp(B->cmap));
2559   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2560   m      = B->rmap->n / bs;
2561   rstart = B->rmap->rstart / bs;
2562   cstart = B->cmap->rstart / bs;
2563   cend   = B->cmap->rend / bs;
2564 
2565   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2566   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2567   for (i = 0; i < m; i++) {
2568     nz = ii[i + 1] - ii[i];
2569     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2570     nz_max = PetscMax(nz_max, nz);
2571     dlen   = 0;
2572     olen   = 0;
2573     JJ     = jj + ii[i];
2574     for (j = 0; j < nz; j++) {
2575       if (*JJ < cstart || *JJ >= cend) olen++;
2576       else dlen++;
2577       JJ++;
2578     }
2579     d_nnz[i] = dlen;
2580     o_nnz[i] = olen;
2581   }
2582   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2583   PetscCall(PetscFree2(d_nnz, o_nnz));
2584 
2585   values = (PetscScalar *)V;
2586   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2587   for (i = 0; i < m; i++) {
2588     PetscInt        row   = i + rstart;
2589     PetscInt        ncols = ii[i + 1] - ii[i];
2590     const PetscInt *icols = jj + ii[i];
2591     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2592       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2593       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2594     } else { /* block ordering does not match so we can only insert one block at a time. */
2595       PetscInt j;
2596       for (j = 0; j < ncols; j++) {
2597         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2598         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2599       }
2600     }
2601   }
2602 
2603   if (!V) PetscCall(PetscFree(values));
2604   nooffprocentries    = B->nooffprocentries;
2605   B->nooffprocentries = PETSC_TRUE;
2606   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2607   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2608   B->nooffprocentries = nooffprocentries;
2609 
2610   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2611   PetscFunctionReturn(PETSC_SUCCESS);
2612 }
2613 
2614 /*@C
2615   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2616 
2617   Collective
2618 
2619   Input Parameters:
2620 + B  - the matrix
2621 . bs - the block size
2622 . i  - the indices into `j` for the start of each local row (starts with zero)
2623 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2624 - v  - optional values in the matrix, use `NULL` if not provided
2625 
2626   Level: advanced
2627 
2628   Notes:
2629   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2630   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2631   called this routine.
2632 
2633   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2634   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2635   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2636   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2637   block column and the second index is over columns within a block.
2638 
2639   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2640 
2641 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2642 @*/
MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])2643 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2644 {
2645   PetscFunctionBegin;
2646   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2647   PetscValidType(B, 1);
2648   PetscValidLogicalCollectiveInt(B, bs, 2);
2649   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2650   PetscFunctionReturn(PETSC_SUCCESS);
2651 }
2652 
MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)2653 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2654 {
2655   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2656   PetscInt     i;
2657   PetscMPIInt  size;
2658 
2659   PetscFunctionBegin;
2660   if (B->hash_active) {
2661     B->ops[0]      = b->cops;
2662     B->hash_active = PETSC_FALSE;
2663   }
2664   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2665   PetscCall(MatSetBlockSize(B, bs));
2666   PetscCall(PetscLayoutSetUp(B->rmap));
2667   PetscCall(PetscLayoutSetUp(B->cmap));
2668   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2669 
2670   if (d_nnz) {
2671     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2672   }
2673   if (o_nnz) {
2674     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2675   }
2676 
2677   b->bs2 = bs * bs;
2678   b->mbs = B->rmap->n / bs;
2679   b->nbs = B->cmap->n / bs;
2680   b->Mbs = B->rmap->N / bs;
2681   b->Nbs = B->cmap->N / bs;
2682 
2683   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2684   b->rstartbs = B->rmap->rstart / bs;
2685   b->rendbs   = B->rmap->rend / bs;
2686   b->cstartbs = B->cmap->rstart / bs;
2687   b->cendbs   = B->cmap->rend / bs;
2688 
2689 #if defined(PETSC_USE_CTABLE)
2690   PetscCall(PetscHMapIDestroy(&b->colmap));
2691 #else
2692   PetscCall(PetscFree(b->colmap));
2693 #endif
2694   PetscCall(PetscFree(b->garray));
2695   PetscCall(VecDestroy(&b->lvec));
2696   PetscCall(VecScatterDestroy(&b->Mvctx));
2697 
2698   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2699 
2700   MatSeqXAIJGetOptions_Private(b->B);
2701   PetscCall(MatDestroy(&b->B));
2702   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2703   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2704   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2705   MatSeqXAIJRestoreOptions_Private(b->B);
2706 
2707   MatSeqXAIJGetOptions_Private(b->A);
2708   PetscCall(MatDestroy(&b->A));
2709   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2710   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2711   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2712   MatSeqXAIJRestoreOptions_Private(b->A);
2713 
2714   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2715   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2716   B->preallocated  = PETSC_TRUE;
2717   B->was_assembled = PETSC_FALSE;
2718   B->assembled     = PETSC_FALSE;
2719   PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721 
2722 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2723 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2724 
MatConvert_MPIBAIJ_MPIAdj(Mat B,MatType newtype,MatReuse reuse,Mat * adj)2725 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2726 {
2727   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2728   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2729   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2730   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2731 
2732   PetscFunctionBegin;
2733   PetscCall(PetscMalloc1(M + 1, &ii));
2734   ii[0] = 0;
2735   for (i = 0; i < M; i++) {
2736     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2737     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2738     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2739     /* remove one from count of matrix has diagonal */
2740     for (j = id[i]; j < id[i + 1]; j++) {
2741       if (jd[j] == i) {
2742         ii[i + 1]--;
2743         break;
2744       }
2745     }
2746   }
2747   PetscCall(PetscMalloc1(ii[M], &jj));
2748   cnt = 0;
2749   for (i = 0; i < M; i++) {
2750     for (j = io[i]; j < io[i + 1]; j++) {
2751       if (garray[jo[j]] > rstart) break;
2752       jj[cnt++] = garray[jo[j]];
2753     }
2754     for (k = id[i]; k < id[i + 1]; k++) {
2755       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2756     }
2757     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2758   }
2759   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2760   PetscFunctionReturn(PETSC_SUCCESS);
2761 }
2762 
2763 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2764 
2765 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2766 
MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)2767 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2768 {
2769   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2770   Mat_MPIAIJ  *b;
2771   Mat          B;
2772 
2773   PetscFunctionBegin;
2774   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2775 
2776   if (reuse == MAT_REUSE_MATRIX) {
2777     B = *newmat;
2778   } else {
2779     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2780     PetscCall(MatSetType(B, MATMPIAIJ));
2781     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2782     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2783     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2784     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2785   }
2786   b = (Mat_MPIAIJ *)B->data;
2787 
2788   if (reuse == MAT_REUSE_MATRIX) {
2789     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2790     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2791   } else {
2792     PetscInt   *garray = a->garray;
2793     Mat_SeqAIJ *bB;
2794     PetscInt    bs, nnz;
2795     PetscCall(MatDestroy(&b->A));
2796     PetscCall(MatDestroy(&b->B));
2797     /* just clear out the data structure */
2798     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2799     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2800     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2801 
2802     /* Global numbering for b->B columns */
2803     bB  = (Mat_SeqAIJ *)b->B->data;
2804     bs  = A->rmap->bs;
2805     nnz = bB->i[A->rmap->n];
2806     for (PetscInt k = 0; k < nnz; k++) {
2807       PetscInt bj = bB->j[k] / bs;
2808       PetscInt br = bB->j[k] % bs;
2809       bB->j[k]    = garray[bj] * bs + br;
2810     }
2811   }
2812   PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2813   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2814   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2815   PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
2816 
2817   if (reuse == MAT_INPLACE_MATRIX) {
2818     PetscCall(MatHeaderReplace(A, &B));
2819   } else {
2820     *newmat = B;
2821   }
2822   PetscFunctionReturn(PETSC_SUCCESS);
2823 }
2824 
2825 /*MC
2826    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2827 
2828    Options Database Keys:
2829 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2830 . -mat_block_size <bs> - set the blocksize used to store the matrix
2831 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2832 - -mat_use_hash_table <fact> - set hash table factor
2833 
2834    Level: beginner
2835 
2836    Note:
2837     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2838     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2839 
2840 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2841 M*/
2842 
2843 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2844 
MatCreate_MPIBAIJ(Mat B)2845 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2846 {
2847   Mat_MPIBAIJ *b;
2848   PetscBool    flg = PETSC_FALSE;
2849 
2850   PetscFunctionBegin;
2851   PetscCall(PetscNew(&b));
2852   B->data      = (void *)b;
2853   B->ops[0]    = MatOps_Values;
2854   B->assembled = PETSC_FALSE;
2855 
2856   B->insertmode = NOT_SET_VALUES;
2857   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2858   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2859 
2860   /* build local table of row and column ownerships */
2861   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2862 
2863   /* build cache for off array entries formed */
2864   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2865 
2866   b->donotstash  = PETSC_FALSE;
2867   b->colmap      = NULL;
2868   b->garray      = NULL;
2869   b->roworiented = PETSC_TRUE;
2870 
2871   /* stuff used in block assembly */
2872   b->barray = NULL;
2873 
2874   /* stuff used for matrix vector multiply */
2875   b->lvec  = NULL;
2876   b->Mvctx = NULL;
2877 
2878   /* stuff for MatGetRow() */
2879   b->rowindices   = NULL;
2880   b->rowvalues    = NULL;
2881   b->getrowactive = PETSC_FALSE;
2882 
2883   /* hash table stuff */
2884   b->ht           = NULL;
2885   b->hd           = NULL;
2886   b->ht_size      = 0;
2887   b->ht_flag      = PETSC_FALSE;
2888   b->ht_fact      = 0;
2889   b->ht_total_ct  = 0;
2890   b->ht_insert_ct = 0;
2891 
2892   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2893   b->ijonly = PETSC_FALSE;
2894 
2895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2898 #if defined(PETSC_HAVE_HYPRE)
2899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2900 #endif
2901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2908   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2909 
2910   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2911   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2912   if (flg) {
2913     PetscReal fact = 1.39;
2914     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2915     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2916     if (fact <= 1.0) fact = 1.39;
2917     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2918     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2919   }
2920   PetscOptionsEnd();
2921   PetscFunctionReturn(PETSC_SUCCESS);
2922 }
2923 
2924 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2925 /*MC
2926    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2927 
2928    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2929    and `MATMPIBAIJ` otherwise.
2930 
2931    Options Database Keys:
2932 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2933 
2934   Level: beginner
2935 
2936 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2937 M*/
2938 
2939 /*@
2940   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2941   (block compressed row).
2942 
2943   Collective
2944 
2945   Input Parameters:
2946 + B     - the matrix
2947 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2948           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2949 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2950            submatrix  (same for all local rows)
2951 . d_nnz - array containing the number of block nonzeros in the various block rows
2952            of the in diagonal portion of the local (possibly different for each block
2953            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2954            set it even if it is zero.
2955 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2956            submatrix (same for all local rows).
2957 - o_nnz - array containing the number of nonzeros in the various block rows of the
2958            off-diagonal portion of the local submatrix (possibly different for
2959            each block row) or `NULL`.
2960 
2961    If the *_nnz parameter is given then the *_nz parameter is ignored
2962 
2963   Options Database Keys:
2964 + -mat_block_size            - size of the blocks to use
2965 - -mat_use_hash_table <fact> - set hash table factor
2966 
2967   Level: intermediate
2968 
2969   Notes:
2970   For good matrix assembly performance
2971   the user should preallocate the matrix storage by setting the parameters
2972   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2973   performance can be increased by more than a factor of 50.
2974 
2975   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2976   than it must be used on all processors that share the object for that argument.
2977 
2978   Storage Information:
2979   For a square global matrix we define each processor's diagonal portion
2980   to be its local rows and the corresponding columns (a square submatrix);
2981   each processor's off-diagonal portion encompasses the remainder of the
2982   local matrix (a rectangular submatrix).
2983 
2984   The user can specify preallocated storage for the diagonal part of
2985   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2986   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2987   memory allocation.  Likewise, specify preallocated storage for the
2988   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2989 
2990   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2991   the figure below we depict these three local rows and all columns (0-11).
2992 
2993 .vb
2994            0 1 2 3 4 5 6 7 8 9 10 11
2995           --------------------------
2996    row 3  |o o o d d d o o o o  o  o
2997    row 4  |o o o d d d o o o o  o  o
2998    row 5  |o o o d d d o o o o  o  o
2999           --------------------------
3000 .ve
3001 
3002   Thus, any entries in the d locations are stored in the d (diagonal)
3003   submatrix, and any entries in the o locations are stored in the
3004   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3005   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3006 
3007   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3008   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3009   In general, for PDE problems in which most nonzeros are near the diagonal,
3010   one expects `d_nz` >> `o_nz`.
3011 
3012   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3013   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3014   You can also run with the option `-info` and look for messages with the string
3015   malloc in them to see if additional memory allocation was needed.
3016 
3017 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3018 @*/
MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])3019 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3020 {
3021   PetscFunctionBegin;
3022   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3023   PetscValidType(B, 1);
3024   PetscValidLogicalCollectiveInt(B, bs, 2);
3025   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3026   PetscFunctionReturn(PETSC_SUCCESS);
3027 }
3028 
3029 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3030 /*@
3031   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3032   (block compressed row).
3033 
3034   Collective
3035 
3036   Input Parameters:
3037 + comm  - MPI communicator
3038 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3039           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3040 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3041           This value should be the same as the local size used in creating the
3042           y vector for the matrix-vector product y = Ax.
3043 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3044           This value should be the same as the local size used in creating the
3045           x vector for the matrix-vector product y = Ax.
3046 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3047 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3048 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3049           submatrix  (same for all local rows)
3050 . d_nnz - array containing the number of nonzero blocks in the various block rows
3051           of the in diagonal portion of the local (possibly different for each block
3052           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3053           and set it even if it is zero.
3054 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3055           submatrix (same for all local rows).
3056 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3057           off-diagonal portion of the local submatrix (possibly different for
3058           each block row) or NULL.
3059 
3060   Output Parameter:
3061 . A - the matrix
3062 
3063   Options Database Keys:
3064 + -mat_block_size            - size of the blocks to use
3065 - -mat_use_hash_table <fact> - set hash table factor
3066 
3067   Level: intermediate
3068 
3069   Notes:
3070   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3071   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3072   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3073 
3074   For good matrix assembly performance
3075   the user should preallocate the matrix storage by setting the parameters
3076   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3077   performance can be increased by more than a factor of 50.
3078 
3079   If the *_nnz parameter is given then the *_nz parameter is ignored
3080 
3081   A nonzero block is any block that as 1 or more nonzeros in it
3082 
3083   The user MUST specify either the local or global matrix dimensions
3084   (possibly both).
3085 
3086   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3087   than it must be used on all processors that share the object for that argument.
3088 
3089   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3090   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3091 
3092   Storage Information:
3093   For a square global matrix we define each processor's diagonal portion
3094   to be its local rows and the corresponding columns (a square submatrix);
3095   each processor's off-diagonal portion encompasses the remainder of the
3096   local matrix (a rectangular submatrix).
3097 
3098   The user can specify preallocated storage for the diagonal part of
3099   the local submatrix with either d_nz or d_nnz (not both).  Set
3100   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3101   memory allocation.  Likewise, specify preallocated storage for the
3102   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3103 
3104   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3105   the figure below we depict these three local rows and all columns (0-11).
3106 
3107 .vb
3108            0 1 2 3 4 5 6 7 8 9 10 11
3109           --------------------------
3110    row 3  |o o o d d d o o o o  o  o
3111    row 4  |o o o d d d o o o o  o  o
3112    row 5  |o o o d d d o o o o  o  o
3113           --------------------------
3114 .ve
3115 
3116   Thus, any entries in the d locations are stored in the d (diagonal)
3117   submatrix, and any entries in the o locations are stored in the
3118   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3119   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3120 
3121   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3122   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3123   In general, for PDE problems in which most nonzeros are near the diagonal,
3124   one expects `d_nz` >> `o_nz`.
3125 
3126 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3127           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3128 @*/
MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)3129 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3130 {
3131   PetscMPIInt size;
3132 
3133   PetscFunctionBegin;
3134   PetscCall(MatCreate(comm, A));
3135   PetscCall(MatSetSizes(*A, m, n, M, N));
3136   PetscCallMPI(MPI_Comm_size(comm, &size));
3137   if (size > 1) {
3138     PetscCall(MatSetType(*A, MATMPIBAIJ));
3139     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3140   } else {
3141     PetscCall(MatSetType(*A, MATSEQBAIJ));
3142     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3143   }
3144   PetscFunctionReturn(PETSC_SUCCESS);
3145 }
3146 
MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)3147 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3148 {
3149   Mat          mat;
3150   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3151   PetscInt     len = 0;
3152 
3153   PetscFunctionBegin;
3154   *newmat = NULL;
3155   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3156   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3157   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3158 
3159   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3160   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3161   if (matin->hash_active) {
3162     PetscCall(MatSetUp(mat));
3163   } else {
3164     mat->factortype   = matin->factortype;
3165     mat->preallocated = PETSC_TRUE;
3166     mat->assembled    = PETSC_TRUE;
3167     mat->insertmode   = NOT_SET_VALUES;
3168 
3169     a             = (Mat_MPIBAIJ *)mat->data;
3170     mat->rmap->bs = matin->rmap->bs;
3171     a->bs2        = oldmat->bs2;
3172     a->mbs        = oldmat->mbs;
3173     a->nbs        = oldmat->nbs;
3174     a->Mbs        = oldmat->Mbs;
3175     a->Nbs        = oldmat->Nbs;
3176 
3177     a->size         = oldmat->size;
3178     a->rank         = oldmat->rank;
3179     a->donotstash   = oldmat->donotstash;
3180     a->roworiented  = oldmat->roworiented;
3181     a->rowindices   = NULL;
3182     a->rowvalues    = NULL;
3183     a->getrowactive = PETSC_FALSE;
3184     a->barray       = NULL;
3185     a->rstartbs     = oldmat->rstartbs;
3186     a->rendbs       = oldmat->rendbs;
3187     a->cstartbs     = oldmat->cstartbs;
3188     a->cendbs       = oldmat->cendbs;
3189 
3190     /* hash table stuff */
3191     a->ht           = NULL;
3192     a->hd           = NULL;
3193     a->ht_size      = 0;
3194     a->ht_flag      = oldmat->ht_flag;
3195     a->ht_fact      = oldmat->ht_fact;
3196     a->ht_total_ct  = 0;
3197     a->ht_insert_ct = 0;
3198 
3199     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3200     if (oldmat->colmap) {
3201 #if defined(PETSC_USE_CTABLE)
3202       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3203 #else
3204       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3205       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3206 #endif
3207     } else a->colmap = NULL;
3208 
3209     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3210       PetscCall(PetscMalloc1(len, &a->garray));
3211       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3212     } else a->garray = NULL;
3213 
3214     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3215     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3216     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3217 
3218     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3219     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3220   }
3221   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3222   *newmat = mat;
3223   PetscFunctionReturn(PETSC_SUCCESS);
3224 }
3225 
3226 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)3227 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3228 {
3229   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3230   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3231   PetscScalar *matvals;
3232 
3233   PetscFunctionBegin;
3234   PetscCall(PetscViewerSetUp(viewer));
3235 
3236   /* read in matrix header */
3237   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3238   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3239   M  = header[1];
3240   N  = header[2];
3241   nz = header[3];
3242   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3243   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3244   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3245 
3246   /* set block sizes from the viewer's .info file */
3247   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3248   /* set local sizes if not set already */
3249   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3250   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3251   /* set global sizes if not set already */
3252   if (mat->rmap->N < 0) mat->rmap->N = M;
3253   if (mat->cmap->N < 0) mat->cmap->N = N;
3254   PetscCall(PetscLayoutSetUp(mat->rmap));
3255   PetscCall(PetscLayoutSetUp(mat->cmap));
3256 
3257   /* check if the matrix sizes are correct */
3258   PetscCall(MatGetSize(mat, &rows, &cols));
3259   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3260   PetscCall(MatGetBlockSize(mat, &bs));
3261   PetscCall(MatGetLocalSize(mat, &m, &n));
3262   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3263   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3264   mbs = m / bs;
3265   nbs = n / bs;
3266 
3267   /* read in row lengths and build row indices */
3268   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3269   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3270   rowidxs[0] = 0;
3271   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3272   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3273   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3274 
3275   /* read in column indices and matrix values */
3276   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3277   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3278   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3279 
3280   {                /* preallocate matrix storage */
3281     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3282     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3283     PetscBool  sbaij, done;
3284     PetscInt  *d_nnz, *o_nnz;
3285 
3286     PetscCall(PetscBTCreate(nbs, &bt));
3287     PetscCall(PetscHSetICreate(&ht));
3288     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3289     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3290     for (i = 0; i < mbs; i++) {
3291       PetscCall(PetscBTMemzero(nbs, bt));
3292       PetscCall(PetscHSetIClear(ht));
3293       for (k = 0; k < bs; k++) {
3294         PetscInt row = bs * i + k;
3295         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3296           PetscInt col = colidxs[j];
3297           if (!sbaij || col >= row) {
3298             if (col >= cs && col < ce) {
3299               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3300             } else {
3301               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3302               if (done) o_nnz[i]++;
3303             }
3304           }
3305         }
3306       }
3307     }
3308     PetscCall(PetscBTDestroy(&bt));
3309     PetscCall(PetscHSetIDestroy(&ht));
3310     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3311     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3312     PetscCall(PetscFree2(d_nnz, o_nnz));
3313   }
3314 
3315   /* store matrix values */
3316   for (i = 0; i < m; i++) {
3317     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3318     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3319   }
3320 
3321   PetscCall(PetscFree(rowidxs));
3322   PetscCall(PetscFree2(colidxs, matvals));
3323   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3324   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3325   PetscFunctionReturn(PETSC_SUCCESS);
3326 }
3327 
MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)3328 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3329 {
3330   PetscBool isbinary;
3331 
3332   PetscFunctionBegin;
3333   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3334   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3335   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3336   PetscFunctionReturn(PETSC_SUCCESS);
3337 }
3338 
3339 /*@
3340   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3341 
3342   Input Parameters:
3343 + mat  - the matrix
3344 - fact - factor
3345 
3346   Options Database Key:
3347 . -mat_use_hash_table <fact> - provide the factor
3348 
3349   Level: advanced
3350 
3351 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3352 @*/
MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)3353 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3354 {
3355   PetscFunctionBegin;
3356   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3357   PetscFunctionReturn(PETSC_SUCCESS);
3358 }
3359 
MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)3360 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3361 {
3362   Mat_MPIBAIJ *baij;
3363 
3364   PetscFunctionBegin;
3365   baij          = (Mat_MPIBAIJ *)mat->data;
3366   baij->ht_fact = fact;
3367   PetscFunctionReturn(PETSC_SUCCESS);
3368 }
3369 
MatMPIBAIJGetSeqBAIJ(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])3370 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3371 {
3372   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3373   PetscBool    flg;
3374 
3375   PetscFunctionBegin;
3376   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3377   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3378   if (Ad) *Ad = a->A;
3379   if (Ao) *Ao = a->B;
3380   if (colmap) *colmap = a->garray;
3381   PetscFunctionReturn(PETSC_SUCCESS);
3382 }
3383 
3384 /*
3385     Special version for direct calls from Fortran (to eliminate two function call overheads
3386 */
3387 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3388   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3389 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3390   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3391 #endif
3392 
3393 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3394 /*@C
3395   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3396 
3397   Collective
3398 
3399   Input Parameters:
3400 + matin  - the matrix
3401 . min    - number of input rows
3402 . im     - input rows
3403 . nin    - number of input columns
3404 . in     - input columns
3405 . v      - numerical values input
3406 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3407 
3408   Level: advanced
3409 
3410   Developer Notes:
3411   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3412 
3413 .seealso: `Mat`, `MatSetValuesBlocked()`
3414 @*/
matmpibaijsetvaluesblocked_(Mat * matin,PetscInt * min,const PetscInt im[],PetscInt * nin,const PetscInt in[],const MatScalar v[],InsertMode * addvin)3415 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3416 {
3417   /* convert input arguments to C version */
3418   Mat        mat = *matin;
3419   PetscInt   m = *min, n = *nin;
3420   InsertMode addv = *addvin;
3421 
3422   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3423   const MatScalar *value;
3424   MatScalar       *barray      = baij->barray;
3425   PetscBool        roworiented = baij->roworiented;
3426   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3427   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3428   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3429 
3430   PetscFunctionBegin;
3431   /* tasks normally handled by MatSetValuesBlocked() */
3432   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3433   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3434   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3435   if (mat->assembled) {
3436     mat->was_assembled = PETSC_TRUE;
3437     mat->assembled     = PETSC_FALSE;
3438   }
3439   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3440 
3441   if (!barray) {
3442     PetscCall(PetscMalloc1(bs2, &barray));
3443     baij->barray = barray;
3444   }
3445 
3446   if (roworiented) stepval = (n - 1) * bs;
3447   else stepval = (m - 1) * bs;
3448 
3449   for (i = 0; i < m; i++) {
3450     if (im[i] < 0) continue;
3451     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3452     if (im[i] >= rstart && im[i] < rend) {
3453       row = im[i] - rstart;
3454       for (j = 0; j < n; j++) {
3455         /* If NumCol = 1 then a copy is not required */
3456         if ((roworiented) && (n == 1)) {
3457           barray = (MatScalar *)v + i * bs2;
3458         } else if ((!roworiented) && (m == 1)) {
3459           barray = (MatScalar *)v + j * bs2;
3460         } else { /* Here a copy is required */
3461           if (roworiented) {
3462             value = v + i * (stepval + bs) * bs + j * bs;
3463           } else {
3464             value = v + j * (stepval + bs) * bs + i * bs;
3465           }
3466           for (ii = 0; ii < bs; ii++, value += stepval) {
3467             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3468           }
3469           barray -= bs2;
3470         }
3471 
3472         if (in[j] >= cstart && in[j] < cend) {
3473           col = in[j] - cstart;
3474           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3475         } else if (in[j] < 0) {
3476           continue;
3477         } else {
3478           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3479           if (mat->was_assembled) {
3480             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3481 
3482 #if defined(PETSC_USE_DEBUG)
3483   #if defined(PETSC_USE_CTABLE)
3484             {
3485               PetscInt data;
3486               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3487               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3488             }
3489   #else
3490             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3491   #endif
3492 #endif
3493 #if defined(PETSC_USE_CTABLE)
3494             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3495             col = (col - 1) / bs;
3496 #else
3497             col = (baij->colmap[in[j]] - 1) / bs;
3498 #endif
3499             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3500               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3501               col = in[j];
3502             }
3503           } else col = in[j];
3504           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3505         }
3506       }
3507     } else {
3508       if (!baij->donotstash) {
3509         if (roworiented) {
3510           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3511         } else {
3512           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3513         }
3514       }
3515     }
3516   }
3517 
3518   /* task normally handled by MatSetValuesBlocked() */
3519   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3520   PetscFunctionReturn(PETSC_SUCCESS);
3521 }
3522 
3523 /*@
3524   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3525 
3526   Collective
3527 
3528   Input Parameters:
3529 + comm - MPI communicator
3530 . bs   - the block size, only a block size of 1 is supported
3531 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3532 . n    - This value should be the same as the local size used in creating the
3533          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3534          calculated if `N` is given) For square matrices `n` is almost always `m`.
3535 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3536 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3537 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3538 . j    - column indices
3539 - a    - matrix values
3540 
3541   Output Parameter:
3542 . mat - the matrix
3543 
3544   Level: intermediate
3545 
3546   Notes:
3547   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3548   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3549   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3550 
3551   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3552   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3553   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3554   with column-major ordering within blocks.
3555 
3556   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3557 
3558 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3559           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3560 @*/
MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)3561 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3562 {
3563   PetscFunctionBegin;
3564   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3565   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3566   PetscCall(MatCreate(comm, mat));
3567   PetscCall(MatSetSizes(*mat, m, n, M, N));
3568   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3569   PetscCall(MatSetBlockSize(*mat, bs));
3570   PetscCall(MatSetUp(*mat));
3571   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3572   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3573   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3574   PetscFunctionReturn(PETSC_SUCCESS);
3575 }
3576 
MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3577 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3578 {
3579   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3580   PetscInt    *indx;
3581   PetscScalar *values;
3582 
3583   PetscFunctionBegin;
3584   PetscCall(MatGetSize(inmat, &m, &N));
3585   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3586     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3587     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3588     PetscInt    *bindx, rmax = a->rmax, j;
3589     PetscMPIInt  rank, size;
3590 
3591     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3592     mbs = m / bs;
3593     Nbs = N / cbs;
3594     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3595     nbs = n / cbs;
3596 
3597     PetscCall(PetscMalloc1(rmax, &bindx));
3598     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3599 
3600     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3601     PetscCallMPI(MPI_Comm_size(comm, &size));
3602     if (rank == size - 1) {
3603       /* Check sum(nbs) = Nbs */
3604       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3605     }
3606 
3607     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3608     for (i = 0; i < mbs; i++) {
3609       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3610       nnz = nnz / bs;
3611       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3612       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3613       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3614     }
3615     PetscCall(PetscFree(bindx));
3616 
3617     PetscCall(MatCreate(comm, outmat));
3618     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3619     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3620     PetscCall(MatSetType(*outmat, MATBAIJ));
3621     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3622     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3623     MatPreallocateEnd(dnz, onz);
3624     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3625   }
3626 
3627   /* numeric phase */
3628   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3629   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3630 
3631   for (i = 0; i < m; i++) {
3632     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3633     Ii = i + rstart;
3634     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3635     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3636   }
3637   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3638   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3639   PetscFunctionReturn(PETSC_SUCCESS);
3640 }
3641