xref: /petsc/src/mat/impls/sell/mpi/mpisell.c (revision 27f169480a6668db3ba90f8ef6ef68d542d113fa)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator,
12    and `MATMPISELL` otherwise.  As a result, for single process communicators,
13   `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
19 
20   Level: beginner
21 
22 .seealso: `Mat`, `MATAIJ`, `MATBAIJ`, `MATSBAIJ`, `MatCreateSELL()`, `MatCreateSeqSELL()`, `MATSEQSELL`, `MATMPISELL`
23 M*/
24 
MatDiagonalSet_MPISELL(Mat Y,Vec D,InsertMode is)25 static PetscErrorCode MatDiagonalSet_MPISELL(Mat Y, Vec D, InsertMode is)
26 {
27   Mat_MPISELL *sell = (Mat_MPISELL *)Y->data;
28 
29   PetscFunctionBegin;
30   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
31     PetscCall(MatDiagonalSet(sell->A, D, is));
32   } else {
33     PetscCall(MatDiagonalSet_Default(Y, D, is));
34   }
35   PetscFunctionReturn(PETSC_SUCCESS);
36 }
37 
38 /*
39   Local utility routine that creates a mapping from the global column
40 number to the local number in the off-diagonal part of the local
41 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
42 a slightly higher hash table cost; without it it is not scalable (each processor
43 has an order N integer array but is fast to access.
44 */
MatCreateColmap_MPISELL_Private(Mat mat)45 PetscErrorCode MatCreateColmap_MPISELL_Private(Mat mat)
46 {
47   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
48   PetscInt     n    = sell->B->cmap->n, i;
49 
50   PetscFunctionBegin;
51   PetscCheck(sell->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPISELL Matrix was assembled but is missing garray");
52 #if defined(PETSC_USE_CTABLE)
53   PetscCall(PetscHMapICreateWithSize(n, &sell->colmap));
54   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(sell->colmap, sell->garray[i] + 1, i + 1));
55 #else
56   PetscCall(PetscCalloc1(mat->cmap->N + 1, &sell->colmap));
57   for (i = 0; i < n; i++) sell->colmap[sell->garray[i]] = i + 1;
58 #endif
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 #define MatSetValues_SeqSELL_A_Private(row, col, value, addv, orow, ocol) \
63   { \
64     if (col <= lastcol1) low1 = 0; \
65     else high1 = nrow1; \
66     lastcol1 = col; \
67     while (high1 - low1 > 5) { \
68       t = (low1 + high1) / 2; \
69       if (cp1[sliceheight * t] > col) high1 = t; \
70       else low1 = t; \
71     } \
72     for (_i = low1; _i < high1; _i++) { \
73       if (cp1[sliceheight * _i] > col) break; \
74       if (cp1[sliceheight * _i] == col) { \
75         if (addv == ADD_VALUES) vp1[sliceheight * _i] += value; \
76         else vp1[sliceheight * _i] = value; \
77         inserted = PETSC_TRUE; \
78         goto a_noinsert; \
79       } \
80     } \
81     if (value == 0.0 && ignorezeroentries) { \
82       low1  = 0; \
83       high1 = nrow1; \
84       goto a_noinsert; \
85     } \
86     if (nonew == 1) { \
87       low1  = 0; \
88       high1 = nrow1; \
89       goto a_noinsert; \
90     } \
91     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
92     MatSeqXSELLReallocateSELL(A, am, 1, nrow1, a->sliidx, a->sliceheight, row / sliceheight, row, col, a->colidx, a->val, cp1, vp1, nonew, MatScalar); \
93     /* shift up all the later entries in this row */ \
94     for (ii = nrow1 - 1; ii >= _i; ii--) { \
95       cp1[sliceheight * (ii + 1)] = cp1[sliceheight * ii]; \
96       vp1[sliceheight * (ii + 1)] = vp1[sliceheight * ii]; \
97     } \
98     cp1[sliceheight * _i] = col; \
99     vp1[sliceheight * _i] = value; \
100     a->nz++; \
101     nrow1++; \
102   a_noinsert:; \
103     a->rlen[row] = nrow1; \
104   }
105 
106 #define MatSetValues_SeqSELL_B_Private(row, col, value, addv, orow, ocol) \
107   { \
108     if (col <= lastcol2) low2 = 0; \
109     else high2 = nrow2; \
110     lastcol2 = col; \
111     while (high2 - low2 > 5) { \
112       t = (low2 + high2) / 2; \
113       if (cp2[sliceheight * t] > col) high2 = t; \
114       else low2 = t; \
115     } \
116     for (_i = low2; _i < high2; _i++) { \
117       if (cp2[sliceheight * _i] > col) break; \
118       if (cp2[sliceheight * _i] == col) { \
119         if (addv == ADD_VALUES) vp2[sliceheight * _i] += value; \
120         else vp2[sliceheight * _i] = value; \
121         inserted = PETSC_TRUE; \
122         goto b_noinsert; \
123       } \
124     } \
125     if (value == 0.0 && ignorezeroentries) { \
126       low2  = 0; \
127       high2 = nrow2; \
128       goto b_noinsert; \
129     } \
130     if (nonew == 1) { \
131       low2  = 0; \
132       high2 = nrow2; \
133       goto b_noinsert; \
134     } \
135     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
136     MatSeqXSELLReallocateSELL(B, bm, 1, nrow2, b->sliidx, b->sliceheight, row / sliceheight, row, col, b->colidx, b->val, cp2, vp2, nonew, MatScalar); \
137     /* shift up all the later entries in this row */ \
138     for (ii = nrow2 - 1; ii >= _i; ii--) { \
139       cp2[sliceheight * (ii + 1)] = cp2[sliceheight * ii]; \
140       vp2[sliceheight * (ii + 1)] = vp2[sliceheight * ii]; \
141     } \
142     cp2[sliceheight * _i] = col; \
143     vp2[sliceheight * _i] = value; \
144     b->nz++; \
145     nrow2++; \
146   b_noinsert:; \
147     b->rlen[row] = nrow2; \
148   }
149 
MatSetValues_MPISELL(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)150 static PetscErrorCode MatSetValues_MPISELL(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
151 {
152   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
153   PetscScalar  value;
154   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, shift1, shift2;
155   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
156   PetscBool    roworiented = sell->roworiented;
157 
158   /* Some Variables required in the macro */
159   Mat          A                 = sell->A;
160   Mat_SeqSELL *a                 = (Mat_SeqSELL *)A->data;
161   PetscBool    ignorezeroentries = a->ignorezeroentries, found;
162   Mat          B                 = sell->B;
163   Mat_SeqSELL *b                 = (Mat_SeqSELL *)B->data;
164   PetscInt    *cp1, *cp2, ii, _i, nrow1, nrow2, low1, high1, low2, high2, t, lastcol1, lastcol2, sliceheight = a->sliceheight;
165   MatScalar   *vp1, *vp2;
166 
167   PetscFunctionBegin;
168   for (i = 0; i < m; i++) {
169     if (im[i] < 0) continue;
170     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
171     if (im[i] >= rstart && im[i] < rend) {
172       row      = im[i] - rstart;
173       lastcol1 = -1;
174       shift1   = a->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
175       cp1      = PetscSafePointerPlusOffset(a->colidx, shift1);
176       vp1      = PetscSafePointerPlusOffset(a->val, shift1);
177       nrow1    = a->rlen[row];
178       low1     = 0;
179       high1    = nrow1;
180       lastcol2 = -1;
181       shift2   = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
182       cp2      = PetscSafePointerPlusOffset(b->colidx, shift2);
183       vp2      = PetscSafePointerPlusOffset(b->val, shift2);
184       nrow2    = b->rlen[row];
185       low2     = 0;
186       high2    = nrow2;
187 
188       for (j = 0; j < n; j++) {
189         if (roworiented) value = v[i * n + j];
190         else value = v[i + j * m];
191         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
192         if (in[j] >= cstart && in[j] < cend) {
193           col = in[j] - cstart;
194           MatSetValue_SeqSELL_Private(A, row, col, value, addv, im[i], in[j], cp1, vp1, lastcol1, low1, high1); /* set one value */
195 #if defined(PETSC_HAVE_CUDA)
196           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) A->offloadmask = PETSC_OFFLOAD_CPU;
197 #endif
198         } else if (in[j] < 0) {
199           continue;
200         } else {
201           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
202           if (mat->was_assembled) {
203             if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
204 #if defined(PETSC_USE_CTABLE)
205             PetscCall(PetscHMapIGetWithDefault(sell->colmap, in[j] + 1, 0, &col));
206             col--;
207 #else
208             col = sell->colmap[in[j]] - 1;
209 #endif
210             if (col < 0 && !((Mat_SeqSELL *)sell->B->data)->nonew) {
211               PetscCall(MatDisAssemble_MPISELL(mat));
212               col = in[j];
213               /* Reinitialize the variables required by MatSetValues_SeqSELL_B_Private() */
214               B      = sell->B;
215               b      = (Mat_SeqSELL *)B->data;
216               shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
217               cp2    = b->colidx + shift2;
218               vp2    = b->val + shift2;
219               nrow2  = b->rlen[row];
220               low2   = 0;
221               high2  = nrow2;
222               found  = PETSC_FALSE;
223             } else {
224               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
225             }
226           } else col = in[j];
227           MatSetValue_SeqSELL_Private(B, row, col, value, addv, im[i], in[j], cp2, vp2, lastcol2, low2, high2); /* set one value */
228 #if defined(PETSC_HAVE_CUDA)
229           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) B->offloadmask = PETSC_OFFLOAD_CPU;
230 #endif
231         }
232       }
233     } else {
234       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
235       if (!sell->donotstash) {
236         mat->assembled = PETSC_FALSE;
237         if (roworiented) {
238           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
239         } else {
240           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
241         }
242       }
243     }
244   }
245   PetscFunctionReturn(PETSC_SUCCESS);
246 }
247 
MatGetValues_MPISELL(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])248 static PetscErrorCode MatGetValues_MPISELL(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
249 {
250   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
251   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
252   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
253 
254   PetscFunctionBegin;
255   for (i = 0; i < m; i++) {
256     if (idxm[i] < 0) continue; /* negative row */
257     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
258     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
259     row = idxm[i] - rstart;
260     for (j = 0; j < n; j++) {
261       if (idxn[j] < 0) continue; /* negative column */
262       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
263       if (idxn[j] >= cstart && idxn[j] < cend) {
264         col = idxn[j] - cstart;
265         PetscCall(MatGetValues(sell->A, 1, &row, 1, &col, v + i * n + j));
266       } else {
267         if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
268 #if defined(PETSC_USE_CTABLE)
269         PetscCall(PetscHMapIGetWithDefault(sell->colmap, idxn[j] + 1, 0, &col));
270         col--;
271 #else
272         col = sell->colmap[idxn[j]] - 1;
273 #endif
274         if (col < 0 || sell->garray[col] != idxn[j]) *(v + i * n + j) = 0.0;
275         else PetscCall(MatGetValues(sell->B, 1, &row, 1, &col, v + i * n + j));
276       }
277     }
278   }
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
MatAssemblyBegin_MPISELL(Mat mat,MatAssemblyType mode)282 static PetscErrorCode MatAssemblyBegin_MPISELL(Mat mat, MatAssemblyType mode)
283 {
284   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
285   PetscInt     nstash, reallocs;
286 
287   PetscFunctionBegin;
288   if (sell->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
289 
290   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
291   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
292   PetscCall(PetscInfo(sell->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
293   PetscFunctionReturn(PETSC_SUCCESS);
294 }
295 
MatAssemblyEnd_MPISELL(Mat mat,MatAssemblyType mode)296 PetscErrorCode MatAssemblyEnd_MPISELL(Mat mat, MatAssemblyType mode)
297 {
298   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
299   PetscMPIInt  n;
300   PetscInt     i, flg;
301   PetscInt    *row, *col;
302   PetscScalar *val;
303   PetscBool    all_assembled;
304   /* do not use 'b = (Mat_SeqSELL*)sell->B->data' as B can be reset in disassembly */
305   PetscFunctionBegin;
306   if (!sell->donotstash && !mat->nooffprocentries) {
307     while (1) {
308       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
309       if (!flg) break;
310 
311       for (i = 0; i < n; i++) { /* assemble one by one */
312         PetscCall(MatSetValues_MPISELL(mat, 1, row + i, 1, col + i, val + i, mat->insertmode));
313       }
314     }
315     PetscCall(MatStashScatterEnd_Private(&mat->stash));
316   }
317 #if defined(PETSC_HAVE_CUDA)
318   if (mat->offloadmask == PETSC_OFFLOAD_CPU) sell->A->offloadmask = PETSC_OFFLOAD_CPU;
319 #endif
320   PetscCall(MatAssemblyBegin(sell->A, mode));
321   PetscCall(MatAssemblyEnd(sell->A, mode));
322 
323   /*
324      determine if any process has disassembled, if so we must
325      also disassemble ourselves, in order that we may reassemble.
326   */
327   /*
328      if nonzero structure of submatrix B cannot change then we know that
329      no process disassembled thus we can skip this stuff
330   */
331   if (!((Mat_SeqSELL *)sell->B->data)->nonew) {
332     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
333     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISELL(mat));
334   }
335   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISELL(mat));
336 #if defined(PETSC_HAVE_CUDA)
337   if (mat->offloadmask == PETSC_OFFLOAD_CPU && sell->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) sell->B->offloadmask = PETSC_OFFLOAD_CPU;
338 #endif
339   PetscCall(MatAssemblyBegin(sell->B, mode));
340   PetscCall(MatAssemblyEnd(sell->B, mode));
341   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
342   sell->rowvalues = NULL;
343   PetscCall(VecDestroy(&sell->diag));
344 
345   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
346   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqSELL *)sell->A->data)->nonew) {
347     PetscObjectState state = sell->A->nonzerostate + sell->B->nonzerostate;
348     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
349   }
350 #if defined(PETSC_HAVE_CUDA)
351   mat->offloadmask = PETSC_OFFLOAD_BOTH;
352 #endif
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
MatZeroEntries_MPISELL(Mat A)356 static PetscErrorCode MatZeroEntries_MPISELL(Mat A)
357 {
358   Mat_MPISELL *l = (Mat_MPISELL *)A->data;
359 
360   PetscFunctionBegin;
361   PetscCall(MatZeroEntries(l->A));
362   PetscCall(MatZeroEntries(l->B));
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
MatMult_MPISELL(Mat A,Vec xx,Vec yy)366 static PetscErrorCode MatMult_MPISELL(Mat A, Vec xx, Vec yy)
367 {
368   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
369   PetscInt     nt;
370 
371   PetscFunctionBegin;
372   PetscCall(VecGetLocalSize(xx, &nt));
373   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
374   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
375   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
376   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
377   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
MatMultDiagonalBlock_MPISELL(Mat A,Vec bb,Vec xx)381 static PetscErrorCode MatMultDiagonalBlock_MPISELL(Mat A, Vec bb, Vec xx)
382 {
383   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
384 
385   PetscFunctionBegin;
386   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
387   PetscFunctionReturn(PETSC_SUCCESS);
388 }
389 
MatMultAdd_MPISELL(Mat A,Vec xx,Vec yy,Vec zz)390 static PetscErrorCode MatMultAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
391 {
392   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
393 
394   PetscFunctionBegin;
395   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
396   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
397   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
398   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
MatMultTranspose_MPISELL(Mat A,Vec xx,Vec yy)402 static PetscErrorCode MatMultTranspose_MPISELL(Mat A, Vec xx, Vec yy)
403 {
404   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
405 
406   PetscFunctionBegin;
407   /* do nondiagonal part */
408   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
409   /* do local part */
410   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
411   /* add partial results together */
412   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
413   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416 
MatIsTranspose_MPISELL(Mat Amat,Mat Bmat,PetscReal tol,PetscBool * f)417 static PetscErrorCode MatIsTranspose_MPISELL(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
418 {
419   MPI_Comm     comm;
420   Mat_MPISELL *Asell = (Mat_MPISELL *)Amat->data, *Bsell;
421   Mat          Adia  = Asell->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
422   IS           Me, Notme;
423   PetscInt     M, N, first, last, *notme, i;
424   PetscMPIInt  size;
425 
426   PetscFunctionBegin;
427   /* Easy test: symmetric diagonal block */
428   Bsell = (Mat_MPISELL *)Bmat->data;
429   Bdia  = Bsell->A;
430   PetscCall(MatIsTranspose(Adia, Bdia, tol, f));
431   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
432   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
433   PetscCallMPI(MPI_Comm_size(comm, &size));
434   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
435 
436   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
437   PetscCall(MatGetSize(Amat, &M, &N));
438   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
439   PetscCall(PetscMalloc1(N - last + first, &notme));
440   for (i = 0; i < first; i++) notme[i] = i;
441   for (i = last; i < M; i++) notme[i - last + first] = i;
442   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
443   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
444   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
445   Aoff = Aoffs[0];
446   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
447   Boff = Boffs[0];
448   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
449   PetscCall(MatDestroyMatrices(1, &Aoffs));
450   PetscCall(MatDestroyMatrices(1, &Boffs));
451   PetscCall(ISDestroy(&Me));
452   PetscCall(ISDestroy(&Notme));
453   PetscCall(PetscFree(notme));
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
MatMultTransposeAdd_MPISELL(Mat A,Vec xx,Vec yy,Vec zz)457 static PetscErrorCode MatMultTransposeAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
458 {
459   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
460 
461   PetscFunctionBegin;
462   /* do nondiagonal part */
463   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
464   /* do local part */
465   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
466   /* add partial results together */
467   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
468   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
469   PetscFunctionReturn(PETSC_SUCCESS);
470 }
471 
472 /*
473   This only works correctly for square matrices where the subblock A->A is the
474    diagonal block
475 */
MatGetDiagonal_MPISELL(Mat A,Vec v)476 static PetscErrorCode MatGetDiagonal_MPISELL(Mat A, Vec v)
477 {
478   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
479 
480   PetscFunctionBegin;
481   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
482   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
483   PetscCall(MatGetDiagonal(a->A, v));
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
MatScale_MPISELL(Mat A,PetscScalar aa)487 static PetscErrorCode MatScale_MPISELL(Mat A, PetscScalar aa)
488 {
489   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
490 
491   PetscFunctionBegin;
492   PetscCall(MatScale(a->A, aa));
493   PetscCall(MatScale(a->B, aa));
494   PetscFunctionReturn(PETSC_SUCCESS);
495 }
496 
MatDestroy_MPISELL(Mat mat)497 PetscErrorCode MatDestroy_MPISELL(Mat mat)
498 {
499   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
500 
501   PetscFunctionBegin;
502   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
503   PetscCall(MatStashDestroy_Private(&mat->stash));
504   PetscCall(VecDestroy(&sell->diag));
505   PetscCall(MatDestroy(&sell->A));
506   PetscCall(MatDestroy(&sell->B));
507 #if defined(PETSC_USE_CTABLE)
508   PetscCall(PetscHMapIDestroy(&sell->colmap));
509 #else
510   PetscCall(PetscFree(sell->colmap));
511 #endif
512   PetscCall(PetscFree(sell->garray));
513   PetscCall(VecDestroy(&sell->lvec));
514   PetscCall(VecScatterDestroy(&sell->Mvctx));
515   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
516   PetscCall(PetscFree(sell->ld));
517   PetscCall(PetscFree(mat->data));
518 
519   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
520   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
521   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
522   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
523   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISELLSetPreallocation_C", NULL));
524   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpiaij_C", NULL));
525 #if defined(PETSC_HAVE_CUDA)
526   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpisellcuda_C", NULL));
527 #endif
528   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
529   PetscFunctionReturn(PETSC_SUCCESS);
530 }
531 
532 #include <petscdraw.h>
MatView_MPISELL_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)533 static PetscErrorCode MatView_MPISELL_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
534 {
535   Mat_MPISELL      *sell = (Mat_MPISELL *)mat->data;
536   PetscMPIInt       rank = sell->rank, size = sell->size;
537   PetscBool         isdraw, isascii, isbinary;
538   PetscViewer       sviewer;
539   PetscViewerFormat format;
540 
541   PetscFunctionBegin;
542   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
543   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
544   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
545   if (isascii) {
546     PetscCall(PetscViewerGetFormat(viewer, &format));
547     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
548       MatInfo   info;
549       PetscInt *inodes;
550 
551       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
552       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
553       PetscCall(MatInodeGetInodeSizes(sell->A, NULL, &inodes, NULL));
554       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
555       if (!inodes) {
556         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
557                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
558       } else {
559         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
560                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
561       }
562       PetscCall(MatGetInfo(sell->A, MAT_LOCAL, &info));
563       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
564       PetscCall(MatGetInfo(sell->B, MAT_LOCAL, &info));
565       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
566       PetscCall(PetscViewerFlush(viewer));
567       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
568       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
569       PetscCall(VecScatterView(sell->Mvctx, viewer));
570       PetscFunctionReturn(PETSC_SUCCESS);
571     } else if (format == PETSC_VIEWER_ASCII_INFO) {
572       PetscInt inodecount, inodelimit, *inodes;
573       PetscCall(MatInodeGetInodeSizes(sell->A, &inodecount, &inodes, &inodelimit));
574       if (inodes) {
575         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
576       } else {
577         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
578       }
579       PetscFunctionReturn(PETSC_SUCCESS);
580     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
581       PetscFunctionReturn(PETSC_SUCCESS);
582     }
583   } else if (isbinary) {
584     if (size == 1) {
585       PetscCall(PetscObjectSetName((PetscObject)sell->A, ((PetscObject)mat)->name));
586       PetscCall(MatView(sell->A, viewer));
587     } else {
588       /* PetscCall(MatView_MPISELL_Binary(mat,viewer)); */
589     }
590     PetscFunctionReturn(PETSC_SUCCESS);
591   } else if (isdraw) {
592     PetscDraw draw;
593     PetscBool isnull;
594     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
595     PetscCall(PetscDrawIsNull(draw, &isnull));
596     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
597   }
598 
599   {
600     /* assemble the entire matrix onto first processor. */
601     Mat          A;
602     Mat_SeqSELL *Aloc;
603     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *acolidx, row, col, i, j;
604     MatScalar   *aval;
605     PetscBool    isnonzero;
606 
607     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
608     if (rank == 0) {
609       PetscCall(MatSetSizes(A, M, N, M, N));
610     } else {
611       PetscCall(MatSetSizes(A, 0, 0, M, N));
612     }
613     /* This is just a temporary matrix, so explicitly using MATMPISELL is probably best */
614     PetscCall(MatSetType(A, MATMPISELL));
615     PetscCall(MatMPISELLSetPreallocation(A, 0, NULL, 0, NULL));
616     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
617 
618     /* copy over the A part */
619     Aloc    = (Mat_SeqSELL *)sell->A->data;
620     acolidx = Aloc->colidx;
621     aval    = Aloc->val;
622     for (i = 0; i < Aloc->totalslices; i++) { /* loop over slices */
623       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
624         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
625         if (isnonzero) { /* check the mask bit */
626           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
627           col = *acolidx + mat->rmap->rstart;
628           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
629         }
630         aval++;
631         acolidx++;
632       }
633     }
634 
635     /* copy over the B part */
636     Aloc    = (Mat_SeqSELL *)sell->B->data;
637     acolidx = Aloc->colidx;
638     aval    = Aloc->val;
639     for (i = 0; i < Aloc->totalslices; i++) {
640       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
641         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
642         if (isnonzero) {
643           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
644           col = sell->garray[*acolidx];
645           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
646         }
647         aval++;
648         acolidx++;
649       }
650     }
651 
652     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
653     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
654     /*
655        Everyone has to call to draw the matrix since the graphics waits are
656        synchronized across all processors that share the PetscDraw object
657     */
658     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
659     if (rank == 0) {
660       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISELL *)A->data)->A, ((PetscObject)mat)->name));
661       PetscCall(MatView_SeqSELL(((Mat_MPISELL *)A->data)->A, sviewer));
662     }
663     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
664     PetscCall(MatDestroy(&A));
665   }
666   PetscFunctionReturn(PETSC_SUCCESS);
667 }
668 
MatView_MPISELL(Mat mat,PetscViewer viewer)669 static PetscErrorCode MatView_MPISELL(Mat mat, PetscViewer viewer)
670 {
671   PetscBool isascii, isdraw, issocket, isbinary;
672 
673   PetscFunctionBegin;
674   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
675   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
676   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
677   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
678   if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPISELL_ASCIIorDraworSocket(mat, viewer));
679   PetscFunctionReturn(PETSC_SUCCESS);
680 }
681 
MatGetGhosts_MPISELL(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])682 static PetscErrorCode MatGetGhosts_MPISELL(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
683 {
684   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
685 
686   PetscFunctionBegin;
687   PetscCall(MatGetSize(sell->B, NULL, nghosts));
688   if (ghosts) *ghosts = sell->garray;
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
MatGetInfo_MPISELL(Mat matin,MatInfoType flag,MatInfo * info)692 static PetscErrorCode MatGetInfo_MPISELL(Mat matin, MatInfoType flag, MatInfo *info)
693 {
694   Mat_MPISELL   *mat = (Mat_MPISELL *)matin->data;
695   Mat            A = mat->A, B = mat->B;
696   PetscLogDouble isend[5], irecv[5];
697 
698   PetscFunctionBegin;
699   info->block_size = 1.0;
700   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
701 
702   isend[0] = info->nz_used;
703   isend[1] = info->nz_allocated;
704   isend[2] = info->nz_unneeded;
705   isend[3] = info->memory;
706   isend[4] = info->mallocs;
707 
708   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
709 
710   isend[0] += info->nz_used;
711   isend[1] += info->nz_allocated;
712   isend[2] += info->nz_unneeded;
713   isend[3] += info->memory;
714   isend[4] += info->mallocs;
715   if (flag == MAT_LOCAL) {
716     info->nz_used      = isend[0];
717     info->nz_allocated = isend[1];
718     info->nz_unneeded  = isend[2];
719     info->memory       = isend[3];
720     info->mallocs      = isend[4];
721   } else if (flag == MAT_GLOBAL_MAX) {
722     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
723 
724     info->nz_used      = irecv[0];
725     info->nz_allocated = irecv[1];
726     info->nz_unneeded  = irecv[2];
727     info->memory       = irecv[3];
728     info->mallocs      = irecv[4];
729   } else if (flag == MAT_GLOBAL_SUM) {
730     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
731 
732     info->nz_used      = irecv[0];
733     info->nz_allocated = irecv[1];
734     info->nz_unneeded  = irecv[2];
735     info->memory       = irecv[3];
736     info->mallocs      = irecv[4];
737   }
738   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
739   info->fill_ratio_needed = 0;
740   info->factor_mallocs    = 0;
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
MatSetOption_MPISELL(Mat A,MatOption op,PetscBool flg)744 static PetscErrorCode MatSetOption_MPISELL(Mat A, MatOption op, PetscBool flg)
745 {
746   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
747 
748   PetscFunctionBegin;
749   switch (op) {
750   case MAT_NEW_NONZERO_LOCATIONS:
751   case MAT_NEW_NONZERO_ALLOCATION_ERR:
752   case MAT_UNUSED_NONZERO_LOCATION_ERR:
753   case MAT_KEEP_NONZERO_PATTERN:
754   case MAT_NEW_NONZERO_LOCATION_ERR:
755   case MAT_USE_INODES:
756   case MAT_IGNORE_ZERO_ENTRIES:
757     MatCheckPreallocated(A, 1);
758     PetscCall(MatSetOption(a->A, op, flg));
759     PetscCall(MatSetOption(a->B, op, flg));
760     break;
761   case MAT_ROW_ORIENTED:
762     MatCheckPreallocated(A, 1);
763     a->roworiented = flg;
764 
765     PetscCall(MatSetOption(a->A, op, flg));
766     PetscCall(MatSetOption(a->B, op, flg));
767     break;
768   case MAT_IGNORE_OFF_PROC_ENTRIES:
769     a->donotstash = flg;
770     break;
771   case MAT_SYMMETRIC:
772     MatCheckPreallocated(A, 1);
773     PetscCall(MatSetOption(a->A, op, flg));
774     break;
775   case MAT_STRUCTURALLY_SYMMETRIC:
776     MatCheckPreallocated(A, 1);
777     PetscCall(MatSetOption(a->A, op, flg));
778     break;
779   case MAT_HERMITIAN:
780     MatCheckPreallocated(A, 1);
781     PetscCall(MatSetOption(a->A, op, flg));
782     break;
783   case MAT_SYMMETRY_ETERNAL:
784     MatCheckPreallocated(A, 1);
785     PetscCall(MatSetOption(a->A, op, flg));
786     break;
787   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
788     MatCheckPreallocated(A, 1);
789     PetscCall(MatSetOption(a->A, op, flg));
790     break;
791   default:
792     break;
793   }
794   PetscFunctionReturn(PETSC_SUCCESS);
795 }
796 
MatDiagonalScale_MPISELL(Mat mat,Vec ll,Vec rr)797 static PetscErrorCode MatDiagonalScale_MPISELL(Mat mat, Vec ll, Vec rr)
798 {
799   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
800   Mat          a = sell->A, b = sell->B;
801   PetscInt     s1, s2, s3;
802 
803   PetscFunctionBegin;
804   PetscCall(MatGetLocalSize(mat, &s2, &s3));
805   if (rr) {
806     PetscCall(VecGetLocalSize(rr, &s1));
807     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
808     /* Overlap communication with computation. */
809     PetscCall(VecScatterBegin(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
810   }
811   if (ll) {
812     PetscCall(VecGetLocalSize(ll, &s1));
813     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
814     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
815   }
816   /* scale  the diagonal block */
817   PetscUseTypeMethod(a, diagonalscale, ll, rr);
818 
819   if (rr) {
820     /* Do a scatter end and then right scale the off-diagonal block */
821     PetscCall(VecScatterEnd(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
822     PetscUseTypeMethod(b, diagonalscale, NULL, sell->lvec);
823   }
824   PetscFunctionReturn(PETSC_SUCCESS);
825 }
826 
MatSetUnfactored_MPISELL(Mat A)827 static PetscErrorCode MatSetUnfactored_MPISELL(Mat A)
828 {
829   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
830 
831   PetscFunctionBegin;
832   PetscCall(MatSetUnfactored(a->A));
833   PetscFunctionReturn(PETSC_SUCCESS);
834 }
835 
MatEqual_MPISELL(Mat A,Mat B,PetscBool * flag)836 static PetscErrorCode MatEqual_MPISELL(Mat A, Mat B, PetscBool *flag)
837 {
838   Mat_MPISELL *matB = (Mat_MPISELL *)B->data, *matA = (Mat_MPISELL *)A->data;
839   Mat          a, b, c, d;
840   PetscBool    flg;
841 
842   PetscFunctionBegin;
843   a = matA->A;
844   b = matA->B;
845   c = matB->A;
846   d = matB->B;
847 
848   PetscCall(MatEqual(a, c, &flg));
849   if (flg) PetscCall(MatEqual(b, d, &flg));
850   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
851   PetscFunctionReturn(PETSC_SUCCESS);
852 }
853 
MatCopy_MPISELL(Mat A,Mat B,MatStructure str)854 static PetscErrorCode MatCopy_MPISELL(Mat A, Mat B, MatStructure str)
855 {
856   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
857   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
858 
859   PetscFunctionBegin;
860   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
861   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
862     /* because of the column compression in the off-processor part of the matrix a->B,
863        the number of columns in a->B and b->B may be different, hence we cannot call
864        the MatCopy() directly on the two parts. If need be, we can provide a more
865        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
866        then copying the submatrices */
867     PetscCall(MatCopy_Basic(A, B, str));
868   } else {
869     PetscCall(MatCopy(a->A, b->A, str));
870     PetscCall(MatCopy(a->B, b->B, str));
871   }
872   PetscFunctionReturn(PETSC_SUCCESS);
873 }
874 
MatSetUp_MPISELL(Mat A)875 static PetscErrorCode MatSetUp_MPISELL(Mat A)
876 {
877   PetscFunctionBegin;
878   PetscCall(MatMPISELLSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
879   PetscFunctionReturn(PETSC_SUCCESS);
880 }
881 
MatConjugate_MPISELL(Mat mat)882 static PetscErrorCode MatConjugate_MPISELL(Mat mat)
883 {
884   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
885 
886   PetscFunctionBegin;
887   PetscCall(MatConjugate_SeqSELL(sell->A));
888   PetscCall(MatConjugate_SeqSELL(sell->B));
889   PetscFunctionReturn(PETSC_SUCCESS);
890 }
891 
MatInvertBlockDiagonal_MPISELL(Mat A,const PetscScalar ** values)892 static PetscErrorCode MatInvertBlockDiagonal_MPISELL(Mat A, const PetscScalar **values)
893 {
894   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
895 
896   PetscFunctionBegin;
897   PetscCall(MatInvertBlockDiagonal(a->A, values));
898   A->factorerrortype = a->A->factorerrortype;
899   PetscFunctionReturn(PETSC_SUCCESS);
900 }
901 
MatSetRandom_MPISELL(Mat x,PetscRandom rctx)902 static PetscErrorCode MatSetRandom_MPISELL(Mat x, PetscRandom rctx)
903 {
904   Mat_MPISELL *sell = (Mat_MPISELL *)x->data;
905 
906   PetscFunctionBegin;
907   PetscCall(MatSetRandom(sell->A, rctx));
908   PetscCall(MatSetRandom(sell->B, rctx));
909   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
910   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
MatSetFromOptions_MPISELL(Mat A,PetscOptionItems PetscOptionsObject)914 static PetscErrorCode MatSetFromOptions_MPISELL(Mat A, PetscOptionItems PetscOptionsObject)
915 {
916   PetscFunctionBegin;
917   PetscOptionsHeadBegin(PetscOptionsObject, "MPISELL options");
918   PetscOptionsHeadEnd();
919   PetscFunctionReturn(PETSC_SUCCESS);
920 }
921 
MatShift_MPISELL(Mat Y,PetscScalar a)922 static PetscErrorCode MatShift_MPISELL(Mat Y, PetscScalar a)
923 {
924   Mat_MPISELL *msell = (Mat_MPISELL *)Y->data;
925   Mat_SeqSELL *sell  = (Mat_SeqSELL *)msell->A->data;
926 
927   PetscFunctionBegin;
928   if (!Y->preallocated) {
929     PetscCall(MatMPISELLSetPreallocation(Y, 1, NULL, 0, NULL));
930   } else if (!sell->nz) {
931     PetscInt nonew = sell->nonew;
932     PetscCall(MatSeqSELLSetPreallocation(msell->A, 1, NULL));
933     sell->nonew = nonew;
934   }
935   PetscCall(MatShift_Basic(Y, a));
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
MatGetDiagonalBlock_MPISELL(Mat A,Mat * a)939 static PetscErrorCode MatGetDiagonalBlock_MPISELL(Mat A, Mat *a)
940 {
941   PetscFunctionBegin;
942   *a = ((Mat_MPISELL *)A->data)->A;
943   PetscFunctionReturn(PETSC_SUCCESS);
944 }
945 
MatStoreValues_MPISELL(Mat mat)946 static PetscErrorCode MatStoreValues_MPISELL(Mat mat)
947 {
948   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
949 
950   PetscFunctionBegin;
951   PetscCall(MatStoreValues(sell->A));
952   PetscCall(MatStoreValues(sell->B));
953   PetscFunctionReturn(PETSC_SUCCESS);
954 }
955 
MatRetrieveValues_MPISELL(Mat mat)956 static PetscErrorCode MatRetrieveValues_MPISELL(Mat mat)
957 {
958   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
959 
960   PetscFunctionBegin;
961   PetscCall(MatRetrieveValues(sell->A));
962   PetscCall(MatRetrieveValues(sell->B));
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
MatMPISELLSetPreallocation_MPISELL(Mat B,PetscInt d_rlenmax,const PetscInt d_rlen[],PetscInt o_rlenmax,const PetscInt o_rlen[])966 static PetscErrorCode MatMPISELLSetPreallocation_MPISELL(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
967 {
968   Mat_MPISELL *b;
969 
970   PetscFunctionBegin;
971   PetscCall(PetscLayoutSetUp(B->rmap));
972   PetscCall(PetscLayoutSetUp(B->cmap));
973   b = (Mat_MPISELL *)B->data;
974 
975   if (!B->preallocated) {
976     /* Explicitly create 2 MATSEQSELL matrices. */
977     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
978     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
979     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
980     PetscCall(MatSetType(b->A, MATSEQSELL));
981     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
982     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
983     PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
984     PetscCall(MatSetType(b->B, MATSEQSELL));
985   }
986 
987   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
988   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
989   B->preallocated  = PETSC_TRUE;
990   B->was_assembled = PETSC_FALSE;
991   /*
992     critical for MatAssemblyEnd to work.
993     MatAssemblyBegin checks it to set up was_assembled
994     and MatAssemblyEnd checks was_assembled to determine whether to build garray
995   */
996   B->assembled = PETSC_FALSE;
997   PetscFunctionReturn(PETSC_SUCCESS);
998 }
999 
MatDuplicate_MPISELL(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)1000 static PetscErrorCode MatDuplicate_MPISELL(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
1001 {
1002   Mat          mat;
1003   Mat_MPISELL *a, *oldmat = (Mat_MPISELL *)matin->data;
1004 
1005   PetscFunctionBegin;
1006   *newmat = NULL;
1007   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
1008   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
1009   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
1010   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
1011   a = (Mat_MPISELL *)mat->data;
1012 
1013   mat->factortype   = matin->factortype;
1014   mat->assembled    = PETSC_TRUE;
1015   mat->insertmode   = NOT_SET_VALUES;
1016   mat->preallocated = PETSC_TRUE;
1017 
1018   a->size         = oldmat->size;
1019   a->rank         = oldmat->rank;
1020   a->donotstash   = oldmat->donotstash;
1021   a->roworiented  = oldmat->roworiented;
1022   a->rowindices   = NULL;
1023   a->rowvalues    = NULL;
1024   a->getrowactive = PETSC_FALSE;
1025 
1026   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
1027   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
1028 
1029   if (oldmat->colmap) {
1030 #if defined(PETSC_USE_CTABLE)
1031     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
1032 #else
1033     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
1034     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
1035 #endif
1036   } else a->colmap = NULL;
1037   if (oldmat->garray) {
1038     PetscInt len;
1039     len = oldmat->B->cmap->n;
1040     PetscCall(PetscMalloc1(len + 1, &a->garray));
1041     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
1042   } else a->garray = NULL;
1043 
1044   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
1045   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
1046   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1047   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
1048   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
1049   *newmat = mat;
1050   PetscFunctionReturn(PETSC_SUCCESS);
1051 }
1052 
1053 static const struct _MatOps MatOps_Values = {MatSetValues_MPISELL,
1054                                              NULL,
1055                                              NULL,
1056                                              MatMult_MPISELL,
1057                                              /* 4*/ MatMultAdd_MPISELL,
1058                                              MatMultTranspose_MPISELL,
1059                                              MatMultTransposeAdd_MPISELL,
1060                                              NULL,
1061                                              NULL,
1062                                              NULL,
1063                                              /*10*/ NULL,
1064                                              NULL,
1065                                              NULL,
1066                                              MatSOR_MPISELL,
1067                                              NULL,
1068                                              /*15*/ MatGetInfo_MPISELL,
1069                                              MatEqual_MPISELL,
1070                                              MatGetDiagonal_MPISELL,
1071                                              MatDiagonalScale_MPISELL,
1072                                              NULL,
1073                                              /*20*/ MatAssemblyBegin_MPISELL,
1074                                              MatAssemblyEnd_MPISELL,
1075                                              MatSetOption_MPISELL,
1076                                              MatZeroEntries_MPISELL,
1077                                              /*24*/ NULL,
1078                                              NULL,
1079                                              NULL,
1080                                              NULL,
1081                                              NULL,
1082                                              /*29*/ MatSetUp_MPISELL,
1083                                              NULL,
1084                                              NULL,
1085                                              MatGetDiagonalBlock_MPISELL,
1086                                              NULL,
1087                                              /*34*/ MatDuplicate_MPISELL,
1088                                              NULL,
1089                                              NULL,
1090                                              NULL,
1091                                              NULL,
1092                                              /*39*/ NULL,
1093                                              NULL,
1094                                              NULL,
1095                                              MatGetValues_MPISELL,
1096                                              MatCopy_MPISELL,
1097                                              /*44*/ NULL,
1098                                              MatScale_MPISELL,
1099                                              MatShift_MPISELL,
1100                                              MatDiagonalSet_MPISELL,
1101                                              NULL,
1102                                              /*49*/ MatSetRandom_MPISELL,
1103                                              NULL,
1104                                              NULL,
1105                                              NULL,
1106                                              NULL,
1107                                              /*54*/ MatFDColoringCreate_MPIXAIJ,
1108                                              NULL,
1109                                              MatSetUnfactored_MPISELL,
1110                                              NULL,
1111                                              NULL,
1112                                              /*59*/ NULL,
1113                                              MatDestroy_MPISELL,
1114                                              MatView_MPISELL,
1115                                              NULL,
1116                                              NULL,
1117                                              /*64*/ NULL,
1118                                              NULL,
1119                                              NULL,
1120                                              NULL,
1121                                              NULL,
1122                                              /*69*/ NULL,
1123                                              NULL,
1124                                              NULL,
1125                                              MatFDColoringApply_AIJ, /* reuse AIJ function */
1126                                              MatSetFromOptions_MPISELL,
1127                                              NULL,
1128                                              /*75*/ NULL,
1129                                              NULL,
1130                                              NULL,
1131                                              NULL,
1132                                              NULL,
1133                                              /*80*/ NULL,
1134                                              NULL,
1135                                              NULL,
1136                                              /*83*/ NULL,
1137                                              NULL,
1138                                              NULL,
1139                                              NULL,
1140                                              NULL,
1141                                              NULL,
1142                                              /*89*/ NULL,
1143                                              NULL,
1144                                              NULL,
1145                                              NULL,
1146                                              MatConjugate_MPISELL,
1147                                              /*94*/ NULL,
1148                                              NULL,
1149                                              NULL,
1150                                              NULL,
1151                                              NULL,
1152                                              /*99*/ NULL,
1153                                              NULL,
1154                                              NULL,
1155                                              NULL,
1156                                              NULL,
1157                                              /*104*/ NULL,
1158                                              NULL,
1159                                              MatGetGhosts_MPISELL,
1160                                              NULL,
1161                                              NULL,
1162                                              /*109*/ MatMultDiagonalBlock_MPISELL,
1163                                              NULL,
1164                                              NULL,
1165                                              NULL,
1166                                              NULL,
1167                                              /*114*/ NULL,
1168                                              NULL,
1169                                              MatInvertBlockDiagonal_MPISELL,
1170                                              NULL,
1171                                              /*119*/ NULL,
1172                                              NULL,
1173                                              NULL,
1174                                              NULL,
1175                                              NULL,
1176                                              /*124*/ NULL,
1177                                              NULL,
1178                                              NULL,
1179                                              NULL,
1180                                              NULL,
1181                                              /*129*/ MatFDColoringSetUp_MPIXAIJ,
1182                                              NULL,
1183                                              NULL,
1184                                              NULL,
1185                                              NULL,
1186                                              /*134*/ NULL,
1187                                              NULL,
1188                                              NULL,
1189                                              NULL,
1190                                              NULL,
1191                                              /*139*/ NULL,
1192                                              NULL,
1193                                              NULL,
1194                                              NULL,
1195                                              NULL,
1196                                              NULL};
1197 
1198 /*@C
1199   MatMPISELLSetPreallocation - Preallocates memory for a `MATMPISELL` sparse parallel matrix in sell format.
1200   For good matrix assembly performance the user should preallocate the matrix storage by
1201   setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
1202 
1203   Collective
1204 
1205   Input Parameters:
1206 + B     - the matrix
1207 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1208            (same value is used for all local rows)
1209 . d_nnz - array containing the number of nonzeros in the various rows of the
1210            DIAGONAL portion of the local submatrix (possibly different for each row)
1211            or NULL (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
1212            The size of this array is equal to the number of local rows, i.e 'm'.
1213            For matrices that will be factored, you must leave room for (and set)
1214            the diagonal entry even if it is zero.
1215 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1216            submatrix (same value is used for all local rows).
1217 - o_nnz - array containing the number of nonzeros in the various rows of the
1218            OFF-DIAGONAL portion of the local submatrix (possibly different for
1219            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
1220            structure. The size of this array is equal to the number
1221            of local rows, i.e 'm'.
1222 
1223   Example usage:
1224   Consider the following 8x8 matrix with 34 non-zero values, that is
1225   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1226   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1227   as follows
1228 
1229 .vb
1230             1  2  0  |  0  3  0  |  0  4
1231     Proc0   0  5  6  |  7  0  0  |  8  0
1232             9  0 10  | 11  0  0  | 12  0
1233     -------------------------------------
1234            13  0 14  | 15 16 17  |  0  0
1235     Proc1   0 18  0  | 19 20 21  |  0  0
1236             0  0  0  | 22 23  0  | 24  0
1237     -------------------------------------
1238     Proc2  25 26 27  |  0  0 28  | 29  0
1239            30  0  0  | 31 32 33  |  0 34
1240 .ve
1241 
1242   This can be represented as a collection of submatrices as
1243 
1244 .vb
1245       A B C
1246       D E F
1247       G H I
1248 .ve
1249 
1250   Where the submatrices A,B,C are owned by proc0, D,E,F are
1251   owned by proc1, G,H,I are owned by proc2.
1252 
1253   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1254   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1255   The 'M','N' parameters are 8,8, and have the same values on all procs.
1256 
1257   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1258   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1259   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1260   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1261   part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1262   matrix, and [DF] as another SeqSELL matrix.
1263 
1264   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
1265   allocated for every row of the local DIAGONAL submatrix, and o_nz
1266   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
1267   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
1268   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1269   In this case, the values of d_nz,o_nz are
1270 .vb
1271      proc0  dnz = 2, o_nz = 2
1272      proc1  dnz = 3, o_nz = 2
1273      proc2  dnz = 1, o_nz = 4
1274 .ve
1275   We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1276   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1277   for proc3. i.e we are using 12+15+10=37 storage locations to store
1278   34 values.
1279 
1280   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
1281   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1282   In the above case the values for d_nnz,o_nnz are
1283 .vb
1284      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
1285      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
1286      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
1287 .ve
1288   Here the space allocated is according to nz (or maximum values in the nnz
1289   if nnz is provided) for DIAGONAL and OFF-DIAGONAL submatrices, i.e (2+2+3+2)*3+(1+4)*2=37
1290 
1291   Level: intermediate
1292 
1293   Notes:
1294   If the *_nnz parameter is given then the *_nz parameter is ignored
1295 
1296   The stored row and column indices begin with zero.
1297 
1298   The parallel matrix is partitioned such that the first m0 rows belong to
1299   process 0, the next m1 rows belong to process 1, the next m2 rows belong
1300   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1301 
1302   The DIAGONAL portion of the local submatrix of a processor can be defined
1303   as the submatrix which is obtained by extraction the part corresponding to
1304   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
1305   first row that belongs to the processor, r2 is the last row belonging to
1306   the this processor, and c1-c2 is range of indices of the local part of a
1307   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
1308   common case of a square matrix, the row and column ranges are the same and
1309   the DIAGONAL part is also square. The remaining portion of the local
1310   submatrix (mxN) constitute the OFF-DIAGONAL portion.
1311 
1312   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
1313 
1314   You can call `MatGetInfo()` to get information on how effective the preallocation was;
1315   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1316   You can also run with the option -info and look for messages with the string
1317   malloc in them to see if additional memory allocation was needed.
1318 
1319 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatCreateSELL()`,
1320           `MATMPISELL`, `MatGetInfo()`, `PetscSplitOwnership()`, `MATSELL`
1321 @*/
MatMPISELLSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])1322 PetscErrorCode MatMPISELLSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
1323 {
1324   PetscFunctionBegin;
1325   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1326   PetscValidType(B, 1);
1327   PetscTryMethod(B, "MatMPISELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
1328   PetscFunctionReturn(PETSC_SUCCESS);
1329 }
1330 
1331 /*MC
1332    MATMPISELL - MATMPISELL = "mpisell" - A matrix type to be used for MPI sparse matrices,
1333    based on the sliced Ellpack format
1334 
1335    Options Database Key:
1336 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
1337 
1338    Level: beginner
1339 
1340 .seealso: `Mat`, `MatCreateSELL()`, `MATSEQSELL`, `MATSELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ`
1341 M*/
1342 
1343 /*@C
1344   MatCreateSELL - Creates a sparse parallel matrix in `MATSELL` format.
1345 
1346   Collective
1347 
1348   Input Parameters:
1349 + comm      - MPI communicator
1350 . m         - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
1351               This value should be the same as the local size used in creating the
1352               y vector for the matrix-vector product y = Ax.
1353 . n         - This value should be the same as the local size used in creating the
1354               x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
1355               calculated if `N` is given) For square matrices n is almost always `m`.
1356 . M         - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1357 . N         - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1358 . d_rlenmax - max number of nonzeros per row in DIAGONAL portion of local submatrix
1359              (same value is used for all local rows)
1360 . d_rlen    - array containing the number of nonzeros in the various rows of the
1361               DIAGONAL portion of the local submatrix (possibly different for each row)
1362               or `NULL`, if d_rlenmax is used to specify the nonzero structure.
1363               The size of this array is equal to the number of local rows, i.e `m`.
1364 . o_rlenmax - max number of nonzeros per row in the OFF-DIAGONAL portion of local
1365               submatrix (same value is used for all local rows).
1366 - o_rlen    - array containing the number of nonzeros in the various rows of the
1367               OFF-DIAGONAL portion of the local submatrix (possibly different for
1368               each row) or `NULL`, if `o_rlenmax` is used to specify the nonzero
1369               structure. The size of this array is equal to the number
1370               of local rows, i.e `m`.
1371 
1372   Output Parameter:
1373 . A - the matrix
1374 
1375   Options Database Key:
1376 . -mat_sell_oneindex - Internally use indexing starting at 1
1377         rather than 0.  When calling `MatSetValues()`,
1378         the user still MUST index entries starting at 0!
1379 
1380   Example:
1381   Consider the following 8x8 matrix with 34 non-zero values, that is
1382   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1383   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1384   as follows
1385 
1386 .vb
1387             1  2  0  |  0  3  0  |  0  4
1388     Proc0   0  5  6  |  7  0  0  |  8  0
1389             9  0 10  | 11  0  0  | 12  0
1390     -------------------------------------
1391            13  0 14  | 15 16 17  |  0  0
1392     Proc1   0 18  0  | 19 20 21  |  0  0
1393             0  0  0  | 22 23  0  | 24  0
1394     -------------------------------------
1395     Proc2  25 26 27  |  0  0 28  | 29  0
1396            30  0  0  | 31 32 33  |  0 34
1397 .ve
1398 
1399   This can be represented as a collection of submatrices as
1400 .vb
1401       A B C
1402       D E F
1403       G H I
1404 .ve
1405 
1406   Where the submatrices A,B,C are owned by proc0, D,E,F are
1407   owned by proc1, G,H,I are owned by proc2.
1408 
1409   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1410   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1411   The 'M','N' parameters are 8,8, and have the same values on all procs.
1412 
1413   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1414   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1415   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1416   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1417   part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1418   matrix, and [DF] as another `MATSEQSELL` matrix.
1419 
1420   When d_rlenmax, o_rlenmax parameters are specified, d_rlenmax storage elements are
1421   allocated for every row of the local DIAGONAL submatrix, and o_rlenmax
1422   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
1423   One way to choose `d_rlenmax` and `o_rlenmax` is to use the maximum number of nonzeros over
1424   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1425   In this case, the values of d_rlenmax,o_rlenmax are
1426 .vb
1427      proc0 - d_rlenmax = 2, o_rlenmax = 2
1428      proc1 - d_rlenmax = 3, o_rlenmax = 2
1429      proc2 - d_rlenmax = 1, o_rlenmax = 4
1430 .ve
1431   We are allocating m*(d_rlenmax+o_rlenmax) storage locations for every proc. This
1432   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1433   for proc3. i.e we are using 12+15+10=37 storage locations to store
1434   34 values.
1435 
1436   When `d_rlen`, `o_rlen` parameters are specified, the storage is specified
1437   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1438   In the above case the values for `d_nnz`, `o_nnz` are
1439 .vb
1440      proc0 - d_nnz = [2,2,2] and o_nnz = [2,2,2]
1441      proc1 - d_nnz = [3,3,2] and o_nnz = [2,1,1]
1442      proc2 - d_nnz = [1,1]   and o_nnz = [4,4]
1443 .ve
1444   Here the space allocated is still 37 though there are 34 nonzeros because
1445   the allocation is always done according to rlenmax.
1446 
1447   Level: intermediate
1448 
1449   Notes:
1450   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1451   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1452   [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`]
1453 
1454   If the *_rlen parameter is given then the *_rlenmax parameter is ignored
1455 
1456   `m`, `n`, `M`, `N` parameters specify the size of the matrix, and its partitioning across
1457   processors, while `d_rlenmax`, `d_rlen`, `o_rlenmax` , `o_rlen` parameters specify the approximate
1458   storage requirements for this matrix.
1459 
1460   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
1461   processor than it must be used on all processors that share the object for
1462   that argument.
1463 
1464   The user MUST specify either the local or global matrix dimensions
1465   (possibly both).
1466 
1467   The parallel matrix is partitioned across processors such that the
1468   first m0 rows belong to process 0, the next m1 rows belong to
1469   process 1, the next m2 rows belong to process 2 etc.. where
1470   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
1471   values corresponding to [`m` x `N`] submatrix.
1472 
1473   The columns are logically partitioned with the n0 columns belonging
1474   to 0th partition, the next n1 columns belonging to the next
1475   partition etc.. where n0,n1,n2... are the input parameter `n`.
1476 
1477   The DIAGONAL portion of the local submatrix on any given processor
1478   is the submatrix corresponding to the rows and columns `m`, `n`
1479   corresponding to the given processor. i.e diagonal matrix on
1480   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
1481   etc. The remaining portion of the local submatrix [m x (N-n)]
1482   constitute the OFF-DIAGONAL portion. The example below better
1483   illustrates this concept.
1484 
1485   For a square global matrix we define each processor's diagonal portion
1486   to be its local rows and the corresponding columns (a square submatrix);
1487   each processor's off-diagonal portion encompasses the remainder of the
1488   local matrix (a rectangular submatrix).
1489 
1490   If `o_rlen`, `d_rlen` are specified, then `o_rlenmax`, and `d_rlenmax` are ignored.
1491 
1492   When calling this routine with a single process communicator, a matrix of
1493   type `MATSEQSELL` is returned.  If a matrix of type `MATMPISELL` is desired for this
1494   type of communicator, use the construction mechanism
1495 .vb
1496    MatCreate(...,&A);
1497    MatSetType(A,MATMPISELL);
1498    MatSetSizes(A, m,n,M,N);
1499    MatMPISELLSetPreallocation(A,...);
1500 .ve
1501 
1502 .seealso: `Mat`, `MATSELL`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatMPISELLSetPreallocation()`, `MATMPISELL`
1503 @*/
MatCreateSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_rlenmax,const PetscInt d_rlen[],PetscInt o_rlenmax,const PetscInt o_rlen[],Mat * A)1504 PetscErrorCode MatCreateSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[], Mat *A)
1505 {
1506   PetscMPIInt size;
1507 
1508   PetscFunctionBegin;
1509   PetscCall(MatCreate(comm, A));
1510   PetscCall(MatSetSizes(*A, m, n, M, N));
1511   PetscCallMPI(MPI_Comm_size(comm, &size));
1512   if (size > 1) {
1513     PetscCall(MatSetType(*A, MATMPISELL));
1514     PetscCall(MatMPISELLSetPreallocation(*A, d_rlenmax, d_rlen, o_rlenmax, o_rlen));
1515   } else {
1516     PetscCall(MatSetType(*A, MATSEQSELL));
1517     PetscCall(MatSeqSELLSetPreallocation(*A, d_rlenmax, d_rlen));
1518   }
1519   PetscFunctionReturn(PETSC_SUCCESS);
1520 }
1521 
1522 /*@C
1523   MatMPISELLGetSeqSELL - Returns the local pieces of this distributed matrix
1524 
1525   Not Collective
1526 
1527   Input Parameter:
1528 . A - the `MATMPISELL` matrix
1529 
1530   Output Parameters:
1531 + Ad     - The diagonal portion of `A`
1532 . Ao     - The off-diagonal portion of `A`
1533 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
1534 
1535   Level: advanced
1536 
1537 .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`
1538 @*/
MatMPISELLGetSeqSELL(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])1539 PetscErrorCode MatMPISELLGetSeqSELL(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
1540 {
1541   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1542   PetscBool    flg;
1543 
1544   PetscFunctionBegin;
1545   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &flg));
1546   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPISELL matrix as input");
1547   if (Ad) *Ad = a->A;
1548   if (Ao) *Ao = a->B;
1549   if (colmap) *colmap = a->garray;
1550   PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552 
1553 /*@C
1554   MatMPISELLGetLocalMatCondensed - Creates a `MATSEQSELL` matrix from an `MATMPISELL` matrix by
1555   taking all its local rows and NON-ZERO columns
1556 
1557   Not Collective
1558 
1559   Input Parameters:
1560 + A     - the matrix
1561 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
1562 . row   - index sets of rows to extract (or `NULL`)
1563 - col   - index sets of columns to extract (or `NULL`)
1564 
1565   Output Parameter:
1566 . A_loc - the local sequential matrix generated
1567 
1568   Level: advanced
1569 
1570 .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`, `MatGetOwnershipRange()`, `MatMPISELLGetLocalMat()`
1571 @*/
MatMPISELLGetLocalMatCondensed(Mat A,MatReuse scall,IS * row,IS * col,Mat * A_loc)1572 PetscErrorCode MatMPISELLGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
1573 {
1574   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1575   PetscInt     i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
1576   IS           isrowa, iscola;
1577   Mat         *aloc;
1578   PetscBool    match;
1579 
1580   PetscFunctionBegin;
1581   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &match));
1582   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPISELL matrix as input");
1583   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1584   if (!row) {
1585     start = A->rmap->rstart;
1586     end   = A->rmap->rend;
1587     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
1588   } else {
1589     isrowa = *row;
1590   }
1591   if (!col) {
1592     start = A->cmap->rstart;
1593     cmap  = a->garray;
1594     nzA   = a->A->cmap->n;
1595     nzB   = a->B->cmap->n;
1596     PetscCall(PetscMalloc1(nzA + nzB, &idx));
1597     ncols = 0;
1598     for (i = 0; i < nzB; i++) {
1599       if (cmap[i] < start) idx[ncols++] = cmap[i];
1600       else break;
1601     }
1602     imark = i;
1603     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
1604     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
1605     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
1606   } else {
1607     iscola = *col;
1608   }
1609   if (scall != MAT_INITIAL_MATRIX) {
1610     PetscCall(PetscMalloc1(1, &aloc));
1611     aloc[0] = *A_loc;
1612   }
1613   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
1614   *A_loc = aloc[0];
1615   PetscCall(PetscFree(aloc));
1616   if (!row) PetscCall(ISDestroy(&isrowa));
1617   if (!col) PetscCall(ISDestroy(&iscola));
1618   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1619   PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621 
1622 #include <../src/mat/impls/aij/mpi/mpiaij.h>
1623 
MatConvert_MPISELL_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1624 PetscErrorCode MatConvert_MPISELL_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1625 {
1626   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1627   Mat          B;
1628   Mat_MPIAIJ  *b;
1629 
1630   PetscFunctionBegin;
1631   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1632 
1633   if (reuse == MAT_REUSE_MATRIX) {
1634     B = *newmat;
1635   } else {
1636     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1637     PetscCall(MatSetType(B, MATMPIAIJ));
1638     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1639     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1640     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
1641     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
1642   }
1643   b = (Mat_MPIAIJ *)B->data;
1644 
1645   if (reuse == MAT_REUSE_MATRIX) {
1646     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
1647     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
1648   } else {
1649     PetscCall(MatDestroy(&b->A));
1650     PetscCall(MatDestroy(&b->B));
1651     PetscCall(MatDisAssemble_MPISELL(A));
1652     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
1653     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
1654     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1655     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1656     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1657     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1658   }
1659 
1660   if (reuse == MAT_INPLACE_MATRIX) {
1661     PetscCall(MatHeaderReplace(A, &B));
1662   } else {
1663     *newmat = B;
1664   }
1665   PetscFunctionReturn(PETSC_SUCCESS);
1666 }
1667 
MatConvert_MPIAIJ_MPISELL(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1668 PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1669 {
1670   Mat_MPIAIJ  *a = (Mat_MPIAIJ *)A->data;
1671   Mat          B;
1672   Mat_MPISELL *b;
1673 
1674   PetscFunctionBegin;
1675   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1676 
1677   if (reuse == MAT_REUSE_MATRIX) {
1678     B = *newmat;
1679   } else {
1680     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)a->A->data, *Ba = (Mat_SeqAIJ *)a->B->data;
1681     PetscInt    i, d_nz = 0, o_nz = 0, m = A->rmap->N, n = A->cmap->N, lm = A->rmap->n, ln = A->cmap->n;
1682     PetscInt   *d_nnz, *o_nnz;
1683     PetscCall(PetscMalloc2(lm, &d_nnz, lm, &o_nnz));
1684     for (i = 0; i < lm; i++) {
1685       d_nnz[i] = Aa->i[i + 1] - Aa->i[i];
1686       o_nnz[i] = Ba->i[i + 1] - Ba->i[i];
1687       if (d_nnz[i] > d_nz) d_nz = d_nnz[i];
1688       if (o_nnz[i] > o_nz) o_nz = o_nnz[i];
1689     }
1690     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1691     PetscCall(MatSetType(B, MATMPISELL));
1692     PetscCall(MatSetSizes(B, lm, ln, m, n));
1693     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1694     PetscCall(MatSeqSELLSetPreallocation(B, d_nz, d_nnz));
1695     PetscCall(MatMPISELLSetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz));
1696     PetscCall(PetscFree2(d_nnz, o_nnz));
1697   }
1698   b = (Mat_MPISELL *)B->data;
1699 
1700   if (reuse == MAT_REUSE_MATRIX) {
1701     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_REUSE_MATRIX, &b->A));
1702     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_REUSE_MATRIX, &b->B));
1703   } else {
1704     PetscCall(MatDestroy(&b->A));
1705     PetscCall(MatDestroy(&b->B));
1706     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_INITIAL_MATRIX, &b->A));
1707     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_INITIAL_MATRIX, &b->B));
1708     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1709     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1710     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1711     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1712   }
1713 
1714   if (reuse == MAT_INPLACE_MATRIX) {
1715     PetscCall(MatHeaderReplace(A, &B));
1716   } else {
1717     *newmat = B;
1718   }
1719   PetscFunctionReturn(PETSC_SUCCESS);
1720 }
1721 
MatSOR_MPISELL(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)1722 PetscErrorCode MatSOR_MPISELL(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1723 {
1724   Mat_MPISELL *mat = (Mat_MPISELL *)matin->data;
1725   Vec          bb1 = NULL;
1726 
1727   PetscFunctionBegin;
1728   if (flag == SOR_APPLY_UPPER) {
1729     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1730     PetscFunctionReturn(PETSC_SUCCESS);
1731   }
1732 
1733   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1734 
1735   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1736     if (flag & SOR_ZERO_INITIAL_GUESS) {
1737       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1738       its--;
1739     }
1740 
1741     while (its--) {
1742       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1743       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1744 
1745       /* update rhs: bb1 = bb - B*x */
1746       PetscCall(VecScale(mat->lvec, -1.0));
1747       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1748 
1749       /* local sweep */
1750       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1751     }
1752   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1753     if (flag & SOR_ZERO_INITIAL_GUESS) {
1754       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1755       its--;
1756     }
1757     while (its--) {
1758       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1759       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1760 
1761       /* update rhs: bb1 = bb - B*x */
1762       PetscCall(VecScale(mat->lvec, -1.0));
1763       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1764 
1765       /* local sweep */
1766       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1767     }
1768   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1769     if (flag & SOR_ZERO_INITIAL_GUESS) {
1770       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1771       its--;
1772     }
1773     while (its--) {
1774       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1775       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1776 
1777       /* update rhs: bb1 = bb - B*x */
1778       PetscCall(VecScale(mat->lvec, -1.0));
1779       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1780 
1781       /* local sweep */
1782       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1783     }
1784   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1785 
1786   PetscCall(VecDestroy(&bb1));
1787 
1788   matin->factorerrortype = mat->A->factorerrortype;
1789   PetscFunctionReturn(PETSC_SUCCESS);
1790 }
1791 
1792 #if defined(PETSC_HAVE_CUDA)
1793 PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat, MatType, MatReuse, Mat *);
1794 #endif
1795 
1796 /*MC
1797    MATMPISELL - MATMPISELL = "MPISELL" - A matrix type to be used for parallel sparse matrices.
1798 
1799    Options Database Keys:
1800 . -mat_type mpisell - sets the matrix type to `MATMPISELL` during a call to `MatSetFromOptions()`
1801 
1802   Level: beginner
1803 
1804 .seealso: `Mat`, `MATSELL`, `MATSEQSELL` `MatCreateSELL()`
1805 M*/
MatCreate_MPISELL(Mat B)1806 PETSC_EXTERN PetscErrorCode MatCreate_MPISELL(Mat B)
1807 {
1808   Mat_MPISELL *b;
1809   PetscMPIInt  size;
1810 
1811   PetscFunctionBegin;
1812   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1813   PetscCall(PetscNew(&b));
1814   B->data       = (void *)b;
1815   B->ops[0]     = MatOps_Values;
1816   B->assembled  = PETSC_FALSE;
1817   B->insertmode = NOT_SET_VALUES;
1818   b->size       = size;
1819   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
1820   /* build cache for off array entries formed */
1821   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
1822 
1823   b->donotstash  = PETSC_FALSE;
1824   b->colmap      = NULL;
1825   b->garray      = NULL;
1826   b->roworiented = PETSC_TRUE;
1827 
1828   /* stuff used for matrix vector multiply */
1829   b->lvec  = NULL;
1830   b->Mvctx = NULL;
1831 
1832   /* stuff for MatGetRow() */
1833   b->rowindices   = NULL;
1834   b->rowvalues    = NULL;
1835   b->getrowactive = PETSC_FALSE;
1836 
1837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISELL));
1838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISELL));
1839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPISELL));
1840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELL));
1841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpiaij_C", MatConvert_MPISELL_MPIAIJ));
1842 #if defined(PETSC_HAVE_CUDA)
1843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpisellcuda_C", MatConvert_MPISELL_MPISELLCUDA));
1844 #endif
1845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPISELL));
1846   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISELL));
1847   PetscFunctionReturn(PETSC_SUCCESS);
1848 }
1849