xref: /petsc/src/mat/impls/sell/mpi/mpisell.c (revision e91c04dfc8a52dee1965211bb1cc8e5bf775178f)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator,
12    and `MATMPISELL` otherwise.  As a result, for single process communicators,
13   `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
19 
20   Level: beginner
21 
22 .seealso: `Mat`, `MATAIJ`, `MATBAIJ`, `MATSBAIJ`, `MatCreateSELL()`, `MatCreateSeqSELL()`, `MATSEQSELL`, `MATMPISELL`
23 M*/
24 
25 static PetscErrorCode MatDiagonalSet_MPISELL(Mat Y, Vec D, InsertMode is)
26 {
27   Mat_MPISELL *sell = (Mat_MPISELL *)Y->data;
28 
29   PetscFunctionBegin;
30   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
31     PetscCall(MatDiagonalSet(sell->A, D, is));
32   } else {
33     PetscCall(MatDiagonalSet_Default(Y, D, is));
34   }
35   PetscFunctionReturn(PETSC_SUCCESS);
36 }
37 
38 /*
39   Local utility routine that creates a mapping from the global column
40 number to the local number in the off-diagonal part of the local
41 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
42 a slightly higher hash table cost; without it it is not scalable (each processor
43 has an order N integer array but is fast to access.
44 */
45 PetscErrorCode MatCreateColmap_MPISELL_Private(Mat mat)
46 {
47   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
48   PetscInt     n    = sell->B->cmap->n, i;
49 
50   PetscFunctionBegin;
51   PetscCheck(sell->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPISELL Matrix was assembled but is missing garray");
52 #if defined(PETSC_USE_CTABLE)
53   PetscCall(PetscHMapICreateWithSize(n, &sell->colmap));
54   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(sell->colmap, sell->garray[i] + 1, i + 1));
55 #else
56   PetscCall(PetscCalloc1(mat->cmap->N + 1, &sell->colmap));
57   for (i = 0; i < n; i++) sell->colmap[sell->garray[i]] = i + 1;
58 #endif
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 #define MatSetValues_SeqSELL_A_Private(row, col, value, addv, orow, ocol) \
63   { \
64     if (col <= lastcol1) low1 = 0; \
65     else high1 = nrow1; \
66     lastcol1 = col; \
67     while (high1 - low1 > 5) { \
68       t = (low1 + high1) / 2; \
69       if (cp1[sliceheight * t] > col) high1 = t; \
70       else low1 = t; \
71     } \
72     for (_i = low1; _i < high1; _i++) { \
73       if (cp1[sliceheight * _i] > col) break; \
74       if (cp1[sliceheight * _i] == col) { \
75         if (addv == ADD_VALUES) vp1[sliceheight * _i] += value; \
76         else vp1[sliceheight * _i] = value; \
77         inserted = PETSC_TRUE; \
78         goto a_noinsert; \
79       } \
80     } \
81     if (value == 0.0 && ignorezeroentries) { \
82       low1  = 0; \
83       high1 = nrow1; \
84       goto a_noinsert; \
85     } \
86     if (nonew == 1) { \
87       low1  = 0; \
88       high1 = nrow1; \
89       goto a_noinsert; \
90     } \
91     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
92     MatSeqXSELLReallocateSELL(A, am, 1, nrow1, a->sliidx, a->sliceheight, row / sliceheight, row, col, a->colidx, a->val, cp1, vp1, nonew, MatScalar); \
93     /* shift up all the later entries in this row */ \
94     for (ii = nrow1 - 1; ii >= _i; ii--) { \
95       cp1[sliceheight * (ii + 1)] = cp1[sliceheight * ii]; \
96       vp1[sliceheight * (ii + 1)] = vp1[sliceheight * ii]; \
97     } \
98     cp1[sliceheight * _i] = col; \
99     vp1[sliceheight * _i] = value; \
100     a->nz++; \
101     nrow1++; \
102   a_noinsert:; \
103     a->rlen[row] = nrow1; \
104   }
105 
106 #define MatSetValues_SeqSELL_B_Private(row, col, value, addv, orow, ocol) \
107   { \
108     if (col <= lastcol2) low2 = 0; \
109     else high2 = nrow2; \
110     lastcol2 = col; \
111     while (high2 - low2 > 5) { \
112       t = (low2 + high2) / 2; \
113       if (cp2[sliceheight * t] > col) high2 = t; \
114       else low2 = t; \
115     } \
116     for (_i = low2; _i < high2; _i++) { \
117       if (cp2[sliceheight * _i] > col) break; \
118       if (cp2[sliceheight * _i] == col) { \
119         if (addv == ADD_VALUES) vp2[sliceheight * _i] += value; \
120         else vp2[sliceheight * _i] = value; \
121         inserted = PETSC_TRUE; \
122         goto b_noinsert; \
123       } \
124     } \
125     if (value == 0.0 && ignorezeroentries) { \
126       low2  = 0; \
127       high2 = nrow2; \
128       goto b_noinsert; \
129     } \
130     if (nonew == 1) { \
131       low2  = 0; \
132       high2 = nrow2; \
133       goto b_noinsert; \
134     } \
135     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
136     MatSeqXSELLReallocateSELL(B, bm, 1, nrow2, b->sliidx, b->sliceheight, row / sliceheight, row, col, b->colidx, b->val, cp2, vp2, nonew, MatScalar); \
137     /* shift up all the later entries in this row */ \
138     for (ii = nrow2 - 1; ii >= _i; ii--) { \
139       cp2[sliceheight * (ii + 1)] = cp2[sliceheight * ii]; \
140       vp2[sliceheight * (ii + 1)] = vp2[sliceheight * ii]; \
141     } \
142     cp2[sliceheight * _i] = col; \
143     vp2[sliceheight * _i] = value; \
144     b->nz++; \
145     nrow2++; \
146   b_noinsert:; \
147     b->rlen[row] = nrow2; \
148   }
149 
150 static PetscErrorCode MatSetValues_MPISELL(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
151 {
152   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
153   PetscScalar  value;
154   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, shift1, shift2;
155   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
156   PetscBool    roworiented = sell->roworiented;
157 
158   /* Some Variables required in the macro */
159   Mat          A                 = sell->A;
160   Mat_SeqSELL *a                 = (Mat_SeqSELL *)A->data;
161   PetscBool    ignorezeroentries = a->ignorezeroentries, found;
162   Mat          B                 = sell->B;
163   Mat_SeqSELL *b                 = (Mat_SeqSELL *)B->data;
164   PetscInt    *cp1, *cp2, ii, _i, nrow1, nrow2, low1, high1, low2, high2, t, lastcol1, lastcol2, sliceheight = a->sliceheight;
165   MatScalar   *vp1, *vp2;
166 
167   PetscFunctionBegin;
168   for (i = 0; i < m; i++) {
169     if (im[i] < 0) continue;
170     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
171     if (im[i] >= rstart && im[i] < rend) {
172       row      = im[i] - rstart;
173       lastcol1 = -1;
174       shift1   = a->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
175       cp1      = PetscSafePointerPlusOffset(a->colidx, shift1);
176       vp1      = PetscSafePointerPlusOffset(a->val, shift1);
177       nrow1    = a->rlen[row];
178       low1     = 0;
179       high1    = nrow1;
180       lastcol2 = -1;
181       shift2   = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
182       cp2      = PetscSafePointerPlusOffset(b->colidx, shift2);
183       vp2      = PetscSafePointerPlusOffset(b->val, shift2);
184       nrow2    = b->rlen[row];
185       low2     = 0;
186       high2    = nrow2;
187 
188       for (j = 0; j < n; j++) {
189         if (roworiented) value = v[i * n + j];
190         else value = v[i + j * m];
191         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
192         if (in[j] >= cstart && in[j] < cend) {
193           col = in[j] - cstart;
194           MatSetValue_SeqSELL_Private(A, row, col, value, addv, im[i], in[j], cp1, vp1, lastcol1, low1, high1); /* set one value */
195 #if defined(PETSC_HAVE_CUDA)
196           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) A->offloadmask = PETSC_OFFLOAD_CPU;
197 #endif
198         } else if (in[j] < 0) {
199           continue;
200         } else {
201           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
202           if (mat->was_assembled) {
203             if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
204 #if defined(PETSC_USE_CTABLE)
205             PetscCall(PetscHMapIGetWithDefault(sell->colmap, in[j] + 1, 0, &col));
206             col--;
207 #else
208             col = sell->colmap[in[j]] - 1;
209 #endif
210             if (col < 0 && !((Mat_SeqSELL *)sell->B->data)->nonew) {
211               PetscCall(MatDisAssemble_MPISELL(mat));
212               col = in[j];
213               /* Reinitialize the variables required by MatSetValues_SeqSELL_B_Private() */
214               B      = sell->B;
215               b      = (Mat_SeqSELL *)B->data;
216               shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
217               cp2    = b->colidx + shift2;
218               vp2    = b->val + shift2;
219               nrow2  = b->rlen[row];
220               low2   = 0;
221               high2  = nrow2;
222               found  = PETSC_FALSE;
223             } else {
224               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
225             }
226           } else col = in[j];
227           MatSetValue_SeqSELL_Private(B, row, col, value, addv, im[i], in[j], cp2, vp2, lastcol2, low2, high2); /* set one value */
228 #if defined(PETSC_HAVE_CUDA)
229           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) B->offloadmask = PETSC_OFFLOAD_CPU;
230 #endif
231         }
232       }
233     } else {
234       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
235       if (!sell->donotstash) {
236         mat->assembled = PETSC_FALSE;
237         if (roworiented) {
238           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
239         } else {
240           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
241         }
242       }
243     }
244   }
245   PetscFunctionReturn(PETSC_SUCCESS);
246 }
247 
248 static PetscErrorCode MatGetValues_MPISELL(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
249 {
250   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
251   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
252   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
253 
254   PetscFunctionBegin;
255   for (i = 0; i < m; i++) {
256     if (idxm[i] < 0) continue; /* negative row */
257     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
258     if (idxm[i] >= rstart && idxm[i] < rend) {
259       row = idxm[i] - rstart;
260       for (j = 0; j < n; j++) {
261         if (idxn[j] < 0) continue; /* negative column */
262         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
263         if (idxn[j] >= cstart && idxn[j] < cend) {
264           col = idxn[j] - cstart;
265           PetscCall(MatGetValues(sell->A, 1, &row, 1, &col, v + i * n + j));
266         } else {
267           if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
268 #if defined(PETSC_USE_CTABLE)
269           PetscCall(PetscHMapIGetWithDefault(sell->colmap, idxn[j] + 1, 0, &col));
270           col--;
271 #else
272           col = sell->colmap[idxn[j]] - 1;
273 #endif
274           if ((col < 0) || (sell->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
275           else PetscCall(MatGetValues(sell->B, 1, &row, 1, &col, v + i * n + j));
276         }
277       }
278     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
279   }
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatAssemblyBegin_MPISELL(Mat mat, MatAssemblyType mode)
284 {
285   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
286   PetscInt     nstash, reallocs;
287 
288   PetscFunctionBegin;
289   if (sell->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
290 
291   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
292   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
293   PetscCall(PetscInfo(sell->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 PetscErrorCode MatAssemblyEnd_MPISELL(Mat mat, MatAssemblyType mode)
298 {
299   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
300   PetscMPIInt  n;
301   PetscInt     i, flg;
302   PetscInt    *row, *col;
303   PetscScalar *val;
304   PetscBool    other_disassembled;
305   /* do not use 'b = (Mat_SeqSELL*)sell->B->data' as B can be reset in disassembly */
306   PetscFunctionBegin;
307   if (!sell->donotstash && !mat->nooffprocentries) {
308     while (1) {
309       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
310       if (!flg) break;
311 
312       for (i = 0; i < n; i++) { /* assemble one by one */
313         PetscCall(MatSetValues_MPISELL(mat, 1, row + i, 1, col + i, val + i, mat->insertmode));
314       }
315     }
316     PetscCall(MatStashScatterEnd_Private(&mat->stash));
317   }
318 #if defined(PETSC_HAVE_CUDA)
319   if (mat->offloadmask == PETSC_OFFLOAD_CPU) sell->A->offloadmask = PETSC_OFFLOAD_CPU;
320 #endif
321   PetscCall(MatAssemblyBegin(sell->A, mode));
322   PetscCall(MatAssemblyEnd(sell->A, mode));
323 
324   /*
325      determine if any processor has disassembled, if so we must
326      also disassemble ourselves, in order that we may reassemble.
327   */
328   /*
329      if nonzero structure of submatrix B cannot change then we know that
330      no processor disassembled thus we can skip this stuff
331   */
332   if (!((Mat_SeqSELL *)sell->B->data)->nonew) {
333     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
334     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISELL(mat));
335   }
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISELL(mat));
337 #if defined(PETSC_HAVE_CUDA)
338   if (mat->offloadmask == PETSC_OFFLOAD_CPU && sell->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) sell->B->offloadmask = PETSC_OFFLOAD_CPU;
339 #endif
340   PetscCall(MatAssemblyBegin(sell->B, mode));
341   PetscCall(MatAssemblyEnd(sell->B, mode));
342   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
343   sell->rowvalues = NULL;
344   PetscCall(VecDestroy(&sell->diag));
345 
346   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
347   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqSELL *)sell->A->data)->nonew) {
348     PetscObjectState state = sell->A->nonzerostate + sell->B->nonzerostate;
349     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
350   }
351 #if defined(PETSC_HAVE_CUDA)
352   mat->offloadmask = PETSC_OFFLOAD_BOTH;
353 #endif
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 static PetscErrorCode MatZeroEntries_MPISELL(Mat A)
358 {
359   Mat_MPISELL *l = (Mat_MPISELL *)A->data;
360 
361   PetscFunctionBegin;
362   PetscCall(MatZeroEntries(l->A));
363   PetscCall(MatZeroEntries(l->B));
364   PetscFunctionReturn(PETSC_SUCCESS);
365 }
366 
367 static PetscErrorCode MatMult_MPISELL(Mat A, Vec xx, Vec yy)
368 {
369   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
370   PetscInt     nt;
371 
372   PetscFunctionBegin;
373   PetscCall(VecGetLocalSize(xx, &nt));
374   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
375   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
376   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
377   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
378   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 static PetscErrorCode MatMultDiagonalBlock_MPISELL(Mat A, Vec bb, Vec xx)
383 {
384   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
385 
386   PetscFunctionBegin;
387   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatMultAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
392 {
393   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
394 
395   PetscFunctionBegin;
396   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
397   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
398   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
399   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static PetscErrorCode MatMultTranspose_MPISELL(Mat A, Vec xx, Vec yy)
404 {
405   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
406 
407   PetscFunctionBegin;
408   /* do nondiagonal part */
409   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
410   /* do local part */
411   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
412   /* add partial results together */
413   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
414   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
418 static PetscErrorCode MatIsTranspose_MPISELL(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
419 {
420   MPI_Comm     comm;
421   Mat_MPISELL *Asell = (Mat_MPISELL *)Amat->data, *Bsell;
422   Mat          Adia  = Asell->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
423   IS           Me, Notme;
424   PetscInt     M, N, first, last, *notme, i;
425   PetscMPIInt  size;
426 
427   PetscFunctionBegin;
428   /* Easy test: symmetric diagonal block */
429   Bsell = (Mat_MPISELL *)Bmat->data;
430   Bdia  = Bsell->A;
431   PetscCall(MatIsTranspose(Adia, Bdia, tol, f));
432   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
433   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
434   PetscCallMPI(MPI_Comm_size(comm, &size));
435   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
436 
437   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
438   PetscCall(MatGetSize(Amat, &M, &N));
439   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
440   PetscCall(PetscMalloc1(N - last + first, &notme));
441   for (i = 0; i < first; i++) notme[i] = i;
442   for (i = last; i < M; i++) notme[i - last + first] = i;
443   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
444   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
445   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
446   Aoff = Aoffs[0];
447   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
448   Boff = Boffs[0];
449   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
450   PetscCall(MatDestroyMatrices(1, &Aoffs));
451   PetscCall(MatDestroyMatrices(1, &Boffs));
452   PetscCall(ISDestroy(&Me));
453   PetscCall(ISDestroy(&Notme));
454   PetscCall(PetscFree(notme));
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
458 static PetscErrorCode MatMultTransposeAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
459 {
460   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
461 
462   PetscFunctionBegin;
463   /* do nondiagonal part */
464   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
465   /* do local part */
466   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
467   /* add partial results together */
468   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
469   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
470   PetscFunctionReturn(PETSC_SUCCESS);
471 }
472 
473 /*
474   This only works correctly for square matrices where the subblock A->A is the
475    diagonal block
476 */
477 static PetscErrorCode MatGetDiagonal_MPISELL(Mat A, Vec v)
478 {
479   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
480 
481   PetscFunctionBegin;
482   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
483   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
484   PetscCall(MatGetDiagonal(a->A, v));
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 static PetscErrorCode MatScale_MPISELL(Mat A, PetscScalar aa)
489 {
490   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
491 
492   PetscFunctionBegin;
493   PetscCall(MatScale(a->A, aa));
494   PetscCall(MatScale(a->B, aa));
495   PetscFunctionReturn(PETSC_SUCCESS);
496 }
497 
498 PetscErrorCode MatDestroy_MPISELL(Mat mat)
499 {
500   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
501 
502   PetscFunctionBegin;
503   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
504   PetscCall(MatStashDestroy_Private(&mat->stash));
505   PetscCall(VecDestroy(&sell->diag));
506   PetscCall(MatDestroy(&sell->A));
507   PetscCall(MatDestroy(&sell->B));
508 #if defined(PETSC_USE_CTABLE)
509   PetscCall(PetscHMapIDestroy(&sell->colmap));
510 #else
511   PetscCall(PetscFree(sell->colmap));
512 #endif
513   PetscCall(PetscFree(sell->garray));
514   PetscCall(VecDestroy(&sell->lvec));
515   PetscCall(VecScatterDestroy(&sell->Mvctx));
516   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
517   PetscCall(PetscFree(sell->ld));
518   PetscCall(PetscFree(mat->data));
519 
520   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
521   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
522   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
523   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
524   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISELLSetPreallocation_C", NULL));
525   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpiaij_C", NULL));
526 #if defined(PETSC_HAVE_CUDA)
527   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpisellcuda_C", NULL));
528 #endif
529   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
530   PetscFunctionReturn(PETSC_SUCCESS);
531 }
532 
533 #include <petscdraw.h>
534 static PetscErrorCode MatView_MPISELL_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
535 {
536   Mat_MPISELL      *sell = (Mat_MPISELL *)mat->data;
537   PetscMPIInt       rank = sell->rank, size = sell->size;
538   PetscBool         isdraw, iascii, isbinary;
539   PetscViewer       sviewer;
540   PetscViewerFormat format;
541 
542   PetscFunctionBegin;
543   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
544   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
545   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
546   if (iascii) {
547     PetscCall(PetscViewerGetFormat(viewer, &format));
548     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
549       MatInfo   info;
550       PetscInt *inodes;
551 
552       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
553       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
554       PetscCall(MatInodeGetInodeSizes(sell->A, NULL, &inodes, NULL));
555       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
556       if (!inodes) {
557         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
558                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
559       } else {
560         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
561                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
562       }
563       PetscCall(MatGetInfo(sell->A, MAT_LOCAL, &info));
564       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
565       PetscCall(MatGetInfo(sell->B, MAT_LOCAL, &info));
566       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
567       PetscCall(PetscViewerFlush(viewer));
568       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
569       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
570       PetscCall(VecScatterView(sell->Mvctx, viewer));
571       PetscFunctionReturn(PETSC_SUCCESS);
572     } else if (format == PETSC_VIEWER_ASCII_INFO) {
573       PetscInt inodecount, inodelimit, *inodes;
574       PetscCall(MatInodeGetInodeSizes(sell->A, &inodecount, &inodes, &inodelimit));
575       if (inodes) {
576         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
577       } else {
578         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
579       }
580       PetscFunctionReturn(PETSC_SUCCESS);
581     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
582       PetscFunctionReturn(PETSC_SUCCESS);
583     }
584   } else if (isbinary) {
585     if (size == 1) {
586       PetscCall(PetscObjectSetName((PetscObject)sell->A, ((PetscObject)mat)->name));
587       PetscCall(MatView(sell->A, viewer));
588     } else {
589       /* PetscCall(MatView_MPISELL_Binary(mat,viewer)); */
590     }
591     PetscFunctionReturn(PETSC_SUCCESS);
592   } else if (isdraw) {
593     PetscDraw draw;
594     PetscBool isnull;
595     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
596     PetscCall(PetscDrawIsNull(draw, &isnull));
597     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
598   }
599 
600   {
601     /* assemble the entire matrix onto first processor. */
602     Mat          A;
603     Mat_SeqSELL *Aloc;
604     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *acolidx, row, col, i, j;
605     MatScalar   *aval;
606     PetscBool    isnonzero;
607 
608     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
609     if (rank == 0) {
610       PetscCall(MatSetSizes(A, M, N, M, N));
611     } else {
612       PetscCall(MatSetSizes(A, 0, 0, M, N));
613     }
614     /* This is just a temporary matrix, so explicitly using MATMPISELL is probably best */
615     PetscCall(MatSetType(A, MATMPISELL));
616     PetscCall(MatMPISELLSetPreallocation(A, 0, NULL, 0, NULL));
617     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
618 
619     /* copy over the A part */
620     Aloc    = (Mat_SeqSELL *)sell->A->data;
621     acolidx = Aloc->colidx;
622     aval    = Aloc->val;
623     for (i = 0; i < Aloc->totalslices; i++) { /* loop over slices */
624       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
625         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
626         if (isnonzero) { /* check the mask bit */
627           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
628           col = *acolidx + mat->rmap->rstart;
629           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
630         }
631         aval++;
632         acolidx++;
633       }
634     }
635 
636     /* copy over the B part */
637     Aloc    = (Mat_SeqSELL *)sell->B->data;
638     acolidx = Aloc->colidx;
639     aval    = Aloc->val;
640     for (i = 0; i < Aloc->totalslices; i++) {
641       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
642         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
643         if (isnonzero) {
644           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
645           col = sell->garray[*acolidx];
646           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
647         }
648         aval++;
649         acolidx++;
650       }
651     }
652 
653     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
654     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
655     /*
656        Everyone has to call to draw the matrix since the graphics waits are
657        synchronized across all processors that share the PetscDraw object
658     */
659     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
660     if (rank == 0) {
661       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISELL *)A->data)->A, ((PetscObject)mat)->name));
662       PetscCall(MatView_SeqSELL(((Mat_MPISELL *)A->data)->A, sviewer));
663     }
664     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
665     PetscCall(MatDestroy(&A));
666   }
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669 
670 static PetscErrorCode MatView_MPISELL(Mat mat, PetscViewer viewer)
671 {
672   PetscBool iascii, isdraw, issocket, isbinary;
673 
674   PetscFunctionBegin;
675   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
676   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
677   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
678   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
679   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPISELL_ASCIIorDraworSocket(mat, viewer));
680   PetscFunctionReturn(PETSC_SUCCESS);
681 }
682 
683 static PetscErrorCode MatGetGhosts_MPISELL(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
684 {
685   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
686 
687   PetscFunctionBegin;
688   PetscCall(MatGetSize(sell->B, NULL, nghosts));
689   if (ghosts) *ghosts = sell->garray;
690   PetscFunctionReturn(PETSC_SUCCESS);
691 }
692 
693 static PetscErrorCode MatGetInfo_MPISELL(Mat matin, MatInfoType flag, MatInfo *info)
694 {
695   Mat_MPISELL   *mat = (Mat_MPISELL *)matin->data;
696   Mat            A = mat->A, B = mat->B;
697   PetscLogDouble isend[5], irecv[5];
698 
699   PetscFunctionBegin;
700   info->block_size = 1.0;
701   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
702 
703   isend[0] = info->nz_used;
704   isend[1] = info->nz_allocated;
705   isend[2] = info->nz_unneeded;
706   isend[3] = info->memory;
707   isend[4] = info->mallocs;
708 
709   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
710 
711   isend[0] += info->nz_used;
712   isend[1] += info->nz_allocated;
713   isend[2] += info->nz_unneeded;
714   isend[3] += info->memory;
715   isend[4] += info->mallocs;
716   if (flag == MAT_LOCAL) {
717     info->nz_used      = isend[0];
718     info->nz_allocated = isend[1];
719     info->nz_unneeded  = isend[2];
720     info->memory       = isend[3];
721     info->mallocs      = isend[4];
722   } else if (flag == MAT_GLOBAL_MAX) {
723     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
724 
725     info->nz_used      = irecv[0];
726     info->nz_allocated = irecv[1];
727     info->nz_unneeded  = irecv[2];
728     info->memory       = irecv[3];
729     info->mallocs      = irecv[4];
730   } else if (flag == MAT_GLOBAL_SUM) {
731     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
732 
733     info->nz_used      = irecv[0];
734     info->nz_allocated = irecv[1];
735     info->nz_unneeded  = irecv[2];
736     info->memory       = irecv[3];
737     info->mallocs      = irecv[4];
738   }
739   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
740   info->fill_ratio_needed = 0;
741   info->factor_mallocs    = 0;
742   PetscFunctionReturn(PETSC_SUCCESS);
743 }
744 
745 static PetscErrorCode MatSetOption_MPISELL(Mat A, MatOption op, PetscBool flg)
746 {
747   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
748 
749   PetscFunctionBegin;
750   switch (op) {
751   case MAT_NEW_NONZERO_LOCATIONS:
752   case MAT_NEW_NONZERO_ALLOCATION_ERR:
753   case MAT_UNUSED_NONZERO_LOCATION_ERR:
754   case MAT_KEEP_NONZERO_PATTERN:
755   case MAT_NEW_NONZERO_LOCATION_ERR:
756   case MAT_USE_INODES:
757   case MAT_IGNORE_ZERO_ENTRIES:
758     MatCheckPreallocated(A, 1);
759     PetscCall(MatSetOption(a->A, op, flg));
760     PetscCall(MatSetOption(a->B, op, flg));
761     break;
762   case MAT_ROW_ORIENTED:
763     MatCheckPreallocated(A, 1);
764     a->roworiented = flg;
765 
766     PetscCall(MatSetOption(a->A, op, flg));
767     PetscCall(MatSetOption(a->B, op, flg));
768     break;
769   case MAT_FORCE_DIAGONAL_ENTRIES:
770   case MAT_SORTED_FULL:
771     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
772     break;
773   case MAT_IGNORE_OFF_PROC_ENTRIES:
774     a->donotstash = flg;
775     break;
776   case MAT_SPD:
777   case MAT_SPD_ETERNAL:
778     break;
779   case MAT_SYMMETRIC:
780     MatCheckPreallocated(A, 1);
781     PetscCall(MatSetOption(a->A, op, flg));
782     break;
783   case MAT_STRUCTURALLY_SYMMETRIC:
784     MatCheckPreallocated(A, 1);
785     PetscCall(MatSetOption(a->A, op, flg));
786     break;
787   case MAT_HERMITIAN:
788     MatCheckPreallocated(A, 1);
789     PetscCall(MatSetOption(a->A, op, flg));
790     break;
791   case MAT_SYMMETRY_ETERNAL:
792     MatCheckPreallocated(A, 1);
793     PetscCall(MatSetOption(a->A, op, flg));
794     break;
795   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
796     MatCheckPreallocated(A, 1);
797     PetscCall(MatSetOption(a->A, op, flg));
798     break;
799   default:
800     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
801   }
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode MatDiagonalScale_MPISELL(Mat mat, Vec ll, Vec rr)
806 {
807   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
808   Mat          a = sell->A, b = sell->B;
809   PetscInt     s1, s2, s3;
810 
811   PetscFunctionBegin;
812   PetscCall(MatGetLocalSize(mat, &s2, &s3));
813   if (rr) {
814     PetscCall(VecGetLocalSize(rr, &s1));
815     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
816     /* Overlap communication with computation. */
817     PetscCall(VecScatterBegin(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
818   }
819   if (ll) {
820     PetscCall(VecGetLocalSize(ll, &s1));
821     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
822     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
823   }
824   /* scale  the diagonal block */
825   PetscUseTypeMethod(a, diagonalscale, ll, rr);
826 
827   if (rr) {
828     /* Do a scatter end and then right scale the off-diagonal block */
829     PetscCall(VecScatterEnd(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
830     PetscUseTypeMethod(b, diagonalscale, NULL, sell->lvec);
831   }
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 static PetscErrorCode MatSetUnfactored_MPISELL(Mat A)
836 {
837   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
838 
839   PetscFunctionBegin;
840   PetscCall(MatSetUnfactored(a->A));
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatEqual_MPISELL(Mat A, Mat B, PetscBool *flag)
845 {
846   Mat_MPISELL *matB = (Mat_MPISELL *)B->data, *matA = (Mat_MPISELL *)A->data;
847   Mat          a, b, c, d;
848   PetscBool    flg;
849 
850   PetscFunctionBegin;
851   a = matA->A;
852   b = matA->B;
853   c = matB->A;
854   d = matB->B;
855 
856   PetscCall(MatEqual(a, c, &flg));
857   if (flg) PetscCall(MatEqual(b, d, &flg));
858   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
862 static PetscErrorCode MatCopy_MPISELL(Mat A, Mat B, MatStructure str)
863 {
864   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
865   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
866 
867   PetscFunctionBegin;
868   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
869   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
870     /* because of the column compression in the off-processor part of the matrix a->B,
871        the number of columns in a->B and b->B may be different, hence we cannot call
872        the MatCopy() directly on the two parts. If need be, we can provide a more
873        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
874        then copying the submatrices */
875     PetscCall(MatCopy_Basic(A, B, str));
876   } else {
877     PetscCall(MatCopy(a->A, b->A, str));
878     PetscCall(MatCopy(a->B, b->B, str));
879   }
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatSetUp_MPISELL(Mat A)
884 {
885   PetscFunctionBegin;
886   PetscCall(MatMPISELLSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
887   PetscFunctionReturn(PETSC_SUCCESS);
888 }
889 
890 static PetscErrorCode MatConjugate_MPISELL(Mat mat)
891 {
892   PetscFunctionBegin;
893   if (PetscDefined(USE_COMPLEX)) {
894     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
895 
896     PetscCall(MatConjugate_SeqSELL(sell->A));
897     PetscCall(MatConjugate_SeqSELL(sell->B));
898   }
899   PetscFunctionReturn(PETSC_SUCCESS);
900 }
901 
902 static PetscErrorCode MatRealPart_MPISELL(Mat A)
903 {
904   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
905 
906   PetscFunctionBegin;
907   PetscCall(MatRealPart(a->A));
908   PetscCall(MatRealPart(a->B));
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatImaginaryPart_MPISELL(Mat A)
913 {
914   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
915 
916   PetscFunctionBegin;
917   PetscCall(MatImaginaryPart(a->A));
918   PetscCall(MatImaginaryPart(a->B));
919   PetscFunctionReturn(PETSC_SUCCESS);
920 }
921 
922 static PetscErrorCode MatInvertBlockDiagonal_MPISELL(Mat A, const PetscScalar **values)
923 {
924   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
925 
926   PetscFunctionBegin;
927   PetscCall(MatInvertBlockDiagonal(a->A, values));
928   A->factorerrortype = a->A->factorerrortype;
929   PetscFunctionReturn(PETSC_SUCCESS);
930 }
931 
932 static PetscErrorCode MatSetRandom_MPISELL(Mat x, PetscRandom rctx)
933 {
934   Mat_MPISELL *sell = (Mat_MPISELL *)x->data;
935 
936   PetscFunctionBegin;
937   PetscCall(MatSetRandom(sell->A, rctx));
938   PetscCall(MatSetRandom(sell->B, rctx));
939   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
940   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
941   PetscFunctionReturn(PETSC_SUCCESS);
942 }
943 
944 static PetscErrorCode MatSetFromOptions_MPISELL(Mat A, PetscOptionItems *PetscOptionsObject)
945 {
946   PetscFunctionBegin;
947   PetscOptionsHeadBegin(PetscOptionsObject, "MPISELL options");
948   PetscOptionsHeadEnd();
949   PetscFunctionReturn(PETSC_SUCCESS);
950 }
951 
952 static PetscErrorCode MatShift_MPISELL(Mat Y, PetscScalar a)
953 {
954   Mat_MPISELL *msell = (Mat_MPISELL *)Y->data;
955   Mat_SeqSELL *sell  = (Mat_SeqSELL *)msell->A->data;
956 
957   PetscFunctionBegin;
958   if (!Y->preallocated) {
959     PetscCall(MatMPISELLSetPreallocation(Y, 1, NULL, 0, NULL));
960   } else if (!sell->nz) {
961     PetscInt nonew = sell->nonew;
962     PetscCall(MatSeqSELLSetPreallocation(msell->A, 1, NULL));
963     sell->nonew = nonew;
964   }
965   PetscCall(MatShift_Basic(Y, a));
966   PetscFunctionReturn(PETSC_SUCCESS);
967 }
968 
969 static PetscErrorCode MatMissingDiagonal_MPISELL(Mat A, PetscBool *missing, PetscInt *d)
970 {
971   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
972 
973   PetscFunctionBegin;
974   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
975   PetscCall(MatMissingDiagonal(a->A, missing, d));
976   if (d) {
977     PetscInt rstart;
978     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
979     *d += rstart;
980   }
981   PetscFunctionReturn(PETSC_SUCCESS);
982 }
983 
984 static PetscErrorCode MatGetDiagonalBlock_MPISELL(Mat A, Mat *a)
985 {
986   PetscFunctionBegin;
987   *a = ((Mat_MPISELL *)A->data)->A;
988   PetscFunctionReturn(PETSC_SUCCESS);
989 }
990 
991 static PetscErrorCode MatStoreValues_MPISELL(Mat mat)
992 {
993   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
994 
995   PetscFunctionBegin;
996   PetscCall(MatStoreValues(sell->A));
997   PetscCall(MatStoreValues(sell->B));
998   PetscFunctionReturn(PETSC_SUCCESS);
999 }
1000 
1001 static PetscErrorCode MatRetrieveValues_MPISELL(Mat mat)
1002 {
1003   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
1004 
1005   PetscFunctionBegin;
1006   PetscCall(MatRetrieveValues(sell->A));
1007   PetscCall(MatRetrieveValues(sell->B));
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 
1011 static PetscErrorCode MatMPISELLSetPreallocation_MPISELL(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
1012 {
1013   Mat_MPISELL *b;
1014 
1015   PetscFunctionBegin;
1016   PetscCall(PetscLayoutSetUp(B->rmap));
1017   PetscCall(PetscLayoutSetUp(B->cmap));
1018   b = (Mat_MPISELL *)B->data;
1019 
1020   if (!B->preallocated) {
1021     /* Explicitly create 2 MATSEQSELL matrices. */
1022     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1023     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1024     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
1025     PetscCall(MatSetType(b->A, MATSEQSELL));
1026     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1027     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
1028     PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
1029     PetscCall(MatSetType(b->B, MATSEQSELL));
1030   }
1031 
1032   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
1033   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
1034   B->preallocated  = PETSC_TRUE;
1035   B->was_assembled = PETSC_FALSE;
1036   /*
1037     critical for MatAssemblyEnd to work.
1038     MatAssemblyBegin checks it to set up was_assembled
1039     and MatAssemblyEnd checks was_assembled to determine whether to build garray
1040   */
1041   B->assembled = PETSC_FALSE;
1042   PetscFunctionReturn(PETSC_SUCCESS);
1043 }
1044 
1045 static PetscErrorCode MatDuplicate_MPISELL(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
1046 {
1047   Mat          mat;
1048   Mat_MPISELL *a, *oldmat = (Mat_MPISELL *)matin->data;
1049 
1050   PetscFunctionBegin;
1051   *newmat = NULL;
1052   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
1053   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
1054   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
1055   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
1056   a = (Mat_MPISELL *)mat->data;
1057 
1058   mat->factortype   = matin->factortype;
1059   mat->assembled    = PETSC_TRUE;
1060   mat->insertmode   = NOT_SET_VALUES;
1061   mat->preallocated = PETSC_TRUE;
1062 
1063   a->size         = oldmat->size;
1064   a->rank         = oldmat->rank;
1065   a->donotstash   = oldmat->donotstash;
1066   a->roworiented  = oldmat->roworiented;
1067   a->rowindices   = NULL;
1068   a->rowvalues    = NULL;
1069   a->getrowactive = PETSC_FALSE;
1070 
1071   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
1072   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
1073 
1074   if (oldmat->colmap) {
1075 #if defined(PETSC_USE_CTABLE)
1076     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
1077 #else
1078     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
1079     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
1080 #endif
1081   } else a->colmap = NULL;
1082   if (oldmat->garray) {
1083     PetscInt len;
1084     len = oldmat->B->cmap->n;
1085     PetscCall(PetscMalloc1(len + 1, &a->garray));
1086     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
1087   } else a->garray = NULL;
1088 
1089   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
1090   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
1091   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1092   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
1093   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
1094   *newmat = mat;
1095   PetscFunctionReturn(PETSC_SUCCESS);
1096 }
1097 
1098 static const struct _MatOps MatOps_Values = {MatSetValues_MPISELL,
1099                                              NULL,
1100                                              NULL,
1101                                              MatMult_MPISELL,
1102                                              /* 4*/ MatMultAdd_MPISELL,
1103                                              MatMultTranspose_MPISELL,
1104                                              MatMultTransposeAdd_MPISELL,
1105                                              NULL,
1106                                              NULL,
1107                                              NULL,
1108                                              /*10*/ NULL,
1109                                              NULL,
1110                                              NULL,
1111                                              MatSOR_MPISELL,
1112                                              NULL,
1113                                              /*15*/ MatGetInfo_MPISELL,
1114                                              MatEqual_MPISELL,
1115                                              MatGetDiagonal_MPISELL,
1116                                              MatDiagonalScale_MPISELL,
1117                                              NULL,
1118                                              /*20*/ MatAssemblyBegin_MPISELL,
1119                                              MatAssemblyEnd_MPISELL,
1120                                              MatSetOption_MPISELL,
1121                                              MatZeroEntries_MPISELL,
1122                                              /*24*/ NULL,
1123                                              NULL,
1124                                              NULL,
1125                                              NULL,
1126                                              NULL,
1127                                              /*29*/ MatSetUp_MPISELL,
1128                                              NULL,
1129                                              NULL,
1130                                              MatGetDiagonalBlock_MPISELL,
1131                                              NULL,
1132                                              /*34*/ MatDuplicate_MPISELL,
1133                                              NULL,
1134                                              NULL,
1135                                              NULL,
1136                                              NULL,
1137                                              /*39*/ NULL,
1138                                              NULL,
1139                                              NULL,
1140                                              MatGetValues_MPISELL,
1141                                              MatCopy_MPISELL,
1142                                              /*44*/ NULL,
1143                                              MatScale_MPISELL,
1144                                              MatShift_MPISELL,
1145                                              MatDiagonalSet_MPISELL,
1146                                              NULL,
1147                                              /*49*/ MatSetRandom_MPISELL,
1148                                              NULL,
1149                                              NULL,
1150                                              NULL,
1151                                              NULL,
1152                                              /*54*/ MatFDColoringCreate_MPIXAIJ,
1153                                              NULL,
1154                                              MatSetUnfactored_MPISELL,
1155                                              NULL,
1156                                              NULL,
1157                                              /*59*/ NULL,
1158                                              MatDestroy_MPISELL,
1159                                              MatView_MPISELL,
1160                                              NULL,
1161                                              NULL,
1162                                              /*64*/ NULL,
1163                                              NULL,
1164                                              NULL,
1165                                              NULL,
1166                                              NULL,
1167                                              /*69*/ NULL,
1168                                              NULL,
1169                                              NULL,
1170                                              NULL,
1171                                              NULL,
1172                                              NULL,
1173                                              /*75*/ MatFDColoringApply_AIJ, /* reuse AIJ function */
1174                                              MatSetFromOptions_MPISELL,
1175                                              NULL,
1176                                              NULL,
1177                                              NULL,
1178                                              /*80*/ NULL,
1179                                              NULL,
1180                                              NULL,
1181                                              /*83*/ NULL,
1182                                              NULL,
1183                                              NULL,
1184                                              NULL,
1185                                              NULL,
1186                                              NULL,
1187                                              /*89*/ NULL,
1188                                              NULL,
1189                                              NULL,
1190                                              NULL,
1191                                              NULL,
1192                                              /*94*/ NULL,
1193                                              NULL,
1194                                              NULL,
1195                                              NULL,
1196                                              NULL,
1197                                              /*99*/ NULL,
1198                                              NULL,
1199                                              NULL,
1200                                              MatConjugate_MPISELL,
1201                                              NULL,
1202                                              /*104*/ NULL,
1203                                              MatRealPart_MPISELL,
1204                                              MatImaginaryPart_MPISELL,
1205                                              NULL,
1206                                              NULL,
1207                                              /*109*/ NULL,
1208                                              NULL,
1209                                              NULL,
1210                                              NULL,
1211                                              MatMissingDiagonal_MPISELL,
1212                                              /*114*/ NULL,
1213                                              NULL,
1214                                              MatGetGhosts_MPISELL,
1215                                              NULL,
1216                                              NULL,
1217                                              /*119*/ MatMultDiagonalBlock_MPISELL,
1218                                              NULL,
1219                                              NULL,
1220                                              NULL,
1221                                              NULL,
1222                                              /*124*/ NULL,
1223                                              NULL,
1224                                              MatInvertBlockDiagonal_MPISELL,
1225                                              NULL,
1226                                              NULL,
1227                                              /*129*/ NULL,
1228                                              NULL,
1229                                              NULL,
1230                                              NULL,
1231                                              NULL,
1232                                              /*134*/ NULL,
1233                                              NULL,
1234                                              NULL,
1235                                              NULL,
1236                                              NULL,
1237                                              /*139*/ NULL,
1238                                              NULL,
1239                                              NULL,
1240                                              MatFDColoringSetUp_MPIXAIJ,
1241                                              NULL,
1242                                              /*144*/ NULL,
1243                                              NULL,
1244                                              NULL,
1245                                              NULL,
1246                                              NULL,
1247                                              NULL,
1248                                              /*150*/ NULL,
1249                                              NULL,
1250                                              NULL,
1251                                              NULL,
1252                                              NULL,
1253                                              /*155*/ NULL,
1254                                              NULL};
1255 
1256 /*@C
1257   MatMPISELLSetPreallocation - Preallocates memory for a `MATMPISELL` sparse parallel matrix in sell format.
1258   For good matrix assembly performance the user should preallocate the matrix storage by
1259   setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
1260 
1261   Collective
1262 
1263   Input Parameters:
1264 + B     - the matrix
1265 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1266            (same value is used for all local rows)
1267 . d_nnz - array containing the number of nonzeros in the various rows of the
1268            DIAGONAL portion of the local submatrix (possibly different for each row)
1269            or NULL (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
1270            The size of this array is equal to the number of local rows, i.e 'm'.
1271            For matrices that will be factored, you must leave room for (and set)
1272            the diagonal entry even if it is zero.
1273 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1274            submatrix (same value is used for all local rows).
1275 - o_nnz - array containing the number of nonzeros in the various rows of the
1276            OFF-DIAGONAL portion of the local submatrix (possibly different for
1277            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
1278            structure. The size of this array is equal to the number
1279            of local rows, i.e 'm'.
1280 
1281   Example usage:
1282   Consider the following 8x8 matrix with 34 non-zero values, that is
1283   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1284   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1285   as follows
1286 
1287 .vb
1288             1  2  0  |  0  3  0  |  0  4
1289     Proc0   0  5  6  |  7  0  0  |  8  0
1290             9  0 10  | 11  0  0  | 12  0
1291     -------------------------------------
1292            13  0 14  | 15 16 17  |  0  0
1293     Proc1   0 18  0  | 19 20 21  |  0  0
1294             0  0  0  | 22 23  0  | 24  0
1295     -------------------------------------
1296     Proc2  25 26 27  |  0  0 28  | 29  0
1297            30  0  0  | 31 32 33  |  0 34
1298 .ve
1299 
1300   This can be represented as a collection of submatrices as
1301 
1302 .vb
1303       A B C
1304       D E F
1305       G H I
1306 .ve
1307 
1308   Where the submatrices A,B,C are owned by proc0, D,E,F are
1309   owned by proc1, G,H,I are owned by proc2.
1310 
1311   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1312   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1313   The 'M','N' parameters are 8,8, and have the same values on all procs.
1314 
1315   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1316   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1317   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1318   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1319   part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1320   matrix, and [DF] as another SeqSELL matrix.
1321 
1322   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
1323   allocated for every row of the local DIAGONAL submatrix, and o_nz
1324   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
1325   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
1326   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1327   In this case, the values of d_nz,o_nz are
1328 .vb
1329      proc0  dnz = 2, o_nz = 2
1330      proc1  dnz = 3, o_nz = 2
1331      proc2  dnz = 1, o_nz = 4
1332 .ve
1333   We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1334   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1335   for proc3. i.e we are using 12+15+10=37 storage locations to store
1336   34 values.
1337 
1338   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
1339   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1340   In the above case the values for d_nnz,o_nnz are
1341 .vb
1342      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
1343      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
1344      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
1345 .ve
1346   Here the space allocated is according to nz (or maximum values in the nnz
1347   if nnz is provided) for DIAGONAL and OFF-DIAGONAL submatrices, i.e (2+2+3+2)*3+(1+4)*2=37
1348 
1349   Level: intermediate
1350 
1351   Notes:
1352   If the *_nnz parameter is given then the *_nz parameter is ignored
1353 
1354   The stored row and column indices begin with zero.
1355 
1356   The parallel matrix is partitioned such that the first m0 rows belong to
1357   process 0, the next m1 rows belong to process 1, the next m2 rows belong
1358   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1359 
1360   The DIAGONAL portion of the local submatrix of a processor can be defined
1361   as the submatrix which is obtained by extraction the part corresponding to
1362   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
1363   first row that belongs to the processor, r2 is the last row belonging to
1364   the this processor, and c1-c2 is range of indices of the local part of a
1365   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
1366   common case of a square matrix, the row and column ranges are the same and
1367   the DIAGONAL part is also square. The remaining portion of the local
1368   submatrix (mxN) constitute the OFF-DIAGONAL portion.
1369 
1370   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
1371 
1372   You can call `MatGetInfo()` to get information on how effective the preallocation was;
1373   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1374   You can also run with the option -info and look for messages with the string
1375   malloc in them to see if additional memory allocation was needed.
1376 
1377 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatCreateSELL()`,
1378           `MATMPISELL`, `MatGetInfo()`, `PetscSplitOwnership()`, `MATSELL`
1379 @*/
1380 PetscErrorCode MatMPISELLSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
1381 {
1382   PetscFunctionBegin;
1383   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1384   PetscValidType(B, 1);
1385   PetscTryMethod(B, "MatMPISELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 /*MC
1390    MATMPISELL - MATMPISELL = "mpisell" - A matrix type to be used for MPI sparse matrices,
1391    based on the sliced Ellpack format
1392 
1393    Options Database Key:
1394 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
1395 
1396    Level: beginner
1397 
1398 .seealso: `Mat`, `MatCreateSELL()`, `MATSEQSELL`, `MATSELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ`
1399 M*/
1400 
1401 /*@C
1402   MatCreateSELL - Creates a sparse parallel matrix in `MATSELL` format.
1403 
1404   Collective
1405 
1406   Input Parameters:
1407 + comm      - MPI communicator
1408 . m         - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
1409               This value should be the same as the local size used in creating the
1410               y vector for the matrix-vector product y = Ax.
1411 . n         - This value should be the same as the local size used in creating the
1412               x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
1413               calculated if `N` is given) For square matrices n is almost always `m`.
1414 . M         - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1415 . N         - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1416 . d_rlenmax - max number of nonzeros per row in DIAGONAL portion of local submatrix
1417              (same value is used for all local rows)
1418 . d_rlen    - array containing the number of nonzeros in the various rows of the
1419               DIAGONAL portion of the local submatrix (possibly different for each row)
1420               or `NULL`, if d_rlenmax is used to specify the nonzero structure.
1421               The size of this array is equal to the number of local rows, i.e `m`.
1422 . o_rlenmax - max number of nonzeros per row in the OFF-DIAGONAL portion of local
1423               submatrix (same value is used for all local rows).
1424 - o_rlen    - array containing the number of nonzeros in the various rows of the
1425               OFF-DIAGONAL portion of the local submatrix (possibly different for
1426               each row) or `NULL`, if `o_rlenmax` is used to specify the nonzero
1427               structure. The size of this array is equal to the number
1428               of local rows, i.e `m`.
1429 
1430   Output Parameter:
1431 . A - the matrix
1432 
1433   Options Database Key:
1434 . -mat_sell_oneindex - Internally use indexing starting at 1
1435         rather than 0.  When calling `MatSetValues()`,
1436         the user still MUST index entries starting at 0!
1437 
1438   Example:
1439   Consider the following 8x8 matrix with 34 non-zero values, that is
1440   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1441   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1442   as follows
1443 
1444 .vb
1445             1  2  0  |  0  3  0  |  0  4
1446     Proc0   0  5  6  |  7  0  0  |  8  0
1447             9  0 10  | 11  0  0  | 12  0
1448     -------------------------------------
1449            13  0 14  | 15 16 17  |  0  0
1450     Proc1   0 18  0  | 19 20 21  |  0  0
1451             0  0  0  | 22 23  0  | 24  0
1452     -------------------------------------
1453     Proc2  25 26 27  |  0  0 28  | 29  0
1454            30  0  0  | 31 32 33  |  0 34
1455 .ve
1456 
1457   This can be represented as a collection of submatrices as
1458 .vb
1459       A B C
1460       D E F
1461       G H I
1462 .ve
1463 
1464   Where the submatrices A,B,C are owned by proc0, D,E,F are
1465   owned by proc1, G,H,I are owned by proc2.
1466 
1467   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1468   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1469   The 'M','N' parameters are 8,8, and have the same values on all procs.
1470 
1471   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1472   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1473   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1474   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1475   part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1476   matrix, and [DF] as another `MATSEQSELL` matrix.
1477 
1478   When d_rlenmax, o_rlenmax parameters are specified, d_rlenmax storage elements are
1479   allocated for every row of the local DIAGONAL submatrix, and o_rlenmax
1480   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
1481   One way to choose `d_rlenmax` and `o_rlenmax` is to use the maximum number of nonzeros over
1482   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1483   In this case, the values of d_rlenmax,o_rlenmax are
1484 .vb
1485      proc0 - d_rlenmax = 2, o_rlenmax = 2
1486      proc1 - d_rlenmax = 3, o_rlenmax = 2
1487      proc2 - d_rlenmax = 1, o_rlenmax = 4
1488 .ve
1489   We are allocating m*(d_rlenmax+o_rlenmax) storage locations for every proc. This
1490   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1491   for proc3. i.e we are using 12+15+10=37 storage locations to store
1492   34 values.
1493 
1494   When `d_rlen`, `o_rlen` parameters are specified, the storage is specified
1495   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1496   In the above case the values for `d_nnz`, `o_nnz` are
1497 .vb
1498      proc0 - d_nnz = [2,2,2] and o_nnz = [2,2,2]
1499      proc1 - d_nnz = [3,3,2] and o_nnz = [2,1,1]
1500      proc2 - d_nnz = [1,1]   and o_nnz = [4,4]
1501 .ve
1502   Here the space allocated is still 37 though there are 34 nonzeros because
1503   the allocation is always done according to rlenmax.
1504 
1505   Level: intermediate
1506 
1507   Notes:
1508   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1509   MatXXXXSetPreallocation() paradigm instead of this routine directly.
1510   [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`]
1511 
1512   If the *_rlen parameter is given then the *_rlenmax parameter is ignored
1513 
1514   `m`, `n`, `M`, `N` parameters specify the size of the matrix, and its partitioning across
1515   processors, while `d_rlenmax`, `d_rlen`, `o_rlenmax` , `o_rlen` parameters specify the approximate
1516   storage requirements for this matrix.
1517 
1518   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
1519   processor than it must be used on all processors that share the object for
1520   that argument.
1521 
1522   The user MUST specify either the local or global matrix dimensions
1523   (possibly both).
1524 
1525   The parallel matrix is partitioned across processors such that the
1526   first m0 rows belong to process 0, the next m1 rows belong to
1527   process 1, the next m2 rows belong to process 2 etc.. where
1528   m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
1529   values corresponding to [`m` x `N`] submatrix.
1530 
1531   The columns are logically partitioned with the n0 columns belonging
1532   to 0th partition, the next n1 columns belonging to the next
1533   partition etc.. where n0,n1,n2... are the input parameter `n`.
1534 
1535   The DIAGONAL portion of the local submatrix on any given processor
1536   is the submatrix corresponding to the rows and columns `m`, `n`
1537   corresponding to the given processor. i.e diagonal matrix on
1538   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
1539   etc. The remaining portion of the local submatrix [m x (N-n)]
1540   constitute the OFF-DIAGONAL portion. The example below better
1541   illustrates this concept.
1542 
1543   For a square global matrix we define each processor's diagonal portion
1544   to be its local rows and the corresponding columns (a square submatrix);
1545   each processor's off-diagonal portion encompasses the remainder of the
1546   local matrix (a rectangular submatrix).
1547 
1548   If `o_rlen`, `d_rlen` are specified, then `o_rlenmax`, and `d_rlenmax` are ignored.
1549 
1550   When calling this routine with a single process communicator, a matrix of
1551   type `MATSEQSELL` is returned.  If a matrix of type `MATMPISELL` is desired for this
1552   type of communicator, use the construction mechanism
1553 .vb
1554    MatCreate(...,&A);
1555    MatSetType(A,MATMPISELL);
1556    MatSetSizes(A, m,n,M,N);
1557    MatMPISELLSetPreallocation(A,...);
1558 .ve
1559 
1560 .seealso: `Mat`, `MATSELL`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatMPISELLSetPreallocation()`, `MATMPISELL`
1561 @*/
1562 PetscErrorCode MatCreateSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[], Mat *A)
1563 {
1564   PetscMPIInt size;
1565 
1566   PetscFunctionBegin;
1567   PetscCall(MatCreate(comm, A));
1568   PetscCall(MatSetSizes(*A, m, n, M, N));
1569   PetscCallMPI(MPI_Comm_size(comm, &size));
1570   if (size > 1) {
1571     PetscCall(MatSetType(*A, MATMPISELL));
1572     PetscCall(MatMPISELLSetPreallocation(*A, d_rlenmax, d_rlen, o_rlenmax, o_rlen));
1573   } else {
1574     PetscCall(MatSetType(*A, MATSEQSELL));
1575     PetscCall(MatSeqSELLSetPreallocation(*A, d_rlenmax, d_rlen));
1576   }
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 /*@C
1581   MatMPISELLGetSeqSELL - Returns the local pieces of this distributed matrix
1582 
1583   Not Collective
1584 
1585   Input Parameter:
1586 . A - the `MATMPISELL` matrix
1587 
1588   Output Parameters:
1589 + Ad     - The diagonal portion of `A`
1590 . Ao     - The off-diagonal portion of `A`
1591 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
1592 
1593   Level: advanced
1594 
1595 .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`
1596 @*/
1597 PetscErrorCode MatMPISELLGetSeqSELL(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
1598 {
1599   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1600   PetscBool    flg;
1601 
1602   PetscFunctionBegin;
1603   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &flg));
1604   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPISELL matrix as input");
1605   if (Ad) *Ad = a->A;
1606   if (Ao) *Ao = a->B;
1607   if (colmap) *colmap = a->garray;
1608   PetscFunctionReturn(PETSC_SUCCESS);
1609 }
1610 
1611 /*@C
1612   MatMPISELLGetLocalMatCondensed - Creates a `MATSEQSELL` matrix from an `MATMPISELL` matrix by
1613   taking all its local rows and NON-ZERO columns
1614 
1615   Not Collective
1616 
1617   Input Parameters:
1618 + A     - the matrix
1619 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
1620 . row   - index sets of rows to extract (or `NULL`)
1621 - col   - index sets of columns to extract (or `NULL`)
1622 
1623   Output Parameter:
1624 . A_loc - the local sequential matrix generated
1625 
1626   Level: advanced
1627 
1628 .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`, `MatGetOwnershipRange()`, `MatMPISELLGetLocalMat()`
1629 @*/
1630 PetscErrorCode MatMPISELLGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
1631 {
1632   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1633   PetscInt     i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
1634   IS           isrowa, iscola;
1635   Mat         *aloc;
1636   PetscBool    match;
1637 
1638   PetscFunctionBegin;
1639   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &match));
1640   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPISELL matrix as input");
1641   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1642   if (!row) {
1643     start = A->rmap->rstart;
1644     end   = A->rmap->rend;
1645     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
1646   } else {
1647     isrowa = *row;
1648   }
1649   if (!col) {
1650     start = A->cmap->rstart;
1651     cmap  = a->garray;
1652     nzA   = a->A->cmap->n;
1653     nzB   = a->B->cmap->n;
1654     PetscCall(PetscMalloc1(nzA + nzB, &idx));
1655     ncols = 0;
1656     for (i = 0; i < nzB; i++) {
1657       if (cmap[i] < start) idx[ncols++] = cmap[i];
1658       else break;
1659     }
1660     imark = i;
1661     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
1662     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
1663     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
1664   } else {
1665     iscola = *col;
1666   }
1667   if (scall != MAT_INITIAL_MATRIX) {
1668     PetscCall(PetscMalloc1(1, &aloc));
1669     aloc[0] = *A_loc;
1670   }
1671   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
1672   *A_loc = aloc[0];
1673   PetscCall(PetscFree(aloc));
1674   if (!row) PetscCall(ISDestroy(&isrowa));
1675   if (!col) PetscCall(ISDestroy(&iscola));
1676   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1677   PetscFunctionReturn(PETSC_SUCCESS);
1678 }
1679 
1680 #include <../src/mat/impls/aij/mpi/mpiaij.h>
1681 
1682 PetscErrorCode MatConvert_MPISELL_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1683 {
1684   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1685   Mat          B;
1686   Mat_MPIAIJ  *b;
1687 
1688   PetscFunctionBegin;
1689   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1690 
1691   if (reuse == MAT_REUSE_MATRIX) {
1692     B = *newmat;
1693   } else {
1694     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1695     PetscCall(MatSetType(B, MATMPIAIJ));
1696     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1697     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1698     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
1699     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
1700   }
1701   b = (Mat_MPIAIJ *)B->data;
1702 
1703   if (reuse == MAT_REUSE_MATRIX) {
1704     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
1705     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
1706   } else {
1707     PetscCall(MatDestroy(&b->A));
1708     PetscCall(MatDestroy(&b->B));
1709     PetscCall(MatDisAssemble_MPISELL(A));
1710     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
1711     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
1712     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1713     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1714     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1715     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1716   }
1717 
1718   if (reuse == MAT_INPLACE_MATRIX) {
1719     PetscCall(MatHeaderReplace(A, &B));
1720   } else {
1721     *newmat = B;
1722   }
1723   PetscFunctionReturn(PETSC_SUCCESS);
1724 }
1725 
1726 PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1727 {
1728   Mat_MPIAIJ  *a = (Mat_MPIAIJ *)A->data;
1729   Mat          B;
1730   Mat_MPISELL *b;
1731 
1732   PetscFunctionBegin;
1733   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1734 
1735   if (reuse == MAT_REUSE_MATRIX) {
1736     B = *newmat;
1737   } else {
1738     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)a->A->data, *Ba = (Mat_SeqAIJ *)a->B->data;
1739     PetscInt    i, d_nz = 0, o_nz = 0, m = A->rmap->N, n = A->cmap->N, lm = A->rmap->n, ln = A->cmap->n;
1740     PetscInt   *d_nnz, *o_nnz;
1741     PetscCall(PetscMalloc2(lm, &d_nnz, lm, &o_nnz));
1742     for (i = 0; i < lm; i++) {
1743       d_nnz[i] = Aa->i[i + 1] - Aa->i[i];
1744       o_nnz[i] = Ba->i[i + 1] - Ba->i[i];
1745       if (d_nnz[i] > d_nz) d_nz = d_nnz[i];
1746       if (o_nnz[i] > o_nz) o_nz = o_nnz[i];
1747     }
1748     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1749     PetscCall(MatSetType(B, MATMPISELL));
1750     PetscCall(MatSetSizes(B, lm, ln, m, n));
1751     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1752     PetscCall(MatSeqSELLSetPreallocation(B, d_nz, d_nnz));
1753     PetscCall(MatMPISELLSetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz));
1754     PetscCall(PetscFree2(d_nnz, o_nnz));
1755   }
1756   b = (Mat_MPISELL *)B->data;
1757 
1758   if (reuse == MAT_REUSE_MATRIX) {
1759     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_REUSE_MATRIX, &b->A));
1760     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_REUSE_MATRIX, &b->B));
1761   } else {
1762     PetscCall(MatDestroy(&b->A));
1763     PetscCall(MatDestroy(&b->B));
1764     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_INITIAL_MATRIX, &b->A));
1765     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_INITIAL_MATRIX, &b->B));
1766     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1767     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1768     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1769     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1770   }
1771 
1772   if (reuse == MAT_INPLACE_MATRIX) {
1773     PetscCall(MatHeaderReplace(A, &B));
1774   } else {
1775     *newmat = B;
1776   }
1777   PetscFunctionReturn(PETSC_SUCCESS);
1778 }
1779 
1780 PetscErrorCode MatSOR_MPISELL(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1781 {
1782   Mat_MPISELL *mat = (Mat_MPISELL *)matin->data;
1783   Vec          bb1 = NULL;
1784 
1785   PetscFunctionBegin;
1786   if (flag == SOR_APPLY_UPPER) {
1787     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1788     PetscFunctionReturn(PETSC_SUCCESS);
1789   }
1790 
1791   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1792 
1793   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1794     if (flag & SOR_ZERO_INITIAL_GUESS) {
1795       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1796       its--;
1797     }
1798 
1799     while (its--) {
1800       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1801       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1802 
1803       /* update rhs: bb1 = bb - B*x */
1804       PetscCall(VecScale(mat->lvec, -1.0));
1805       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1806 
1807       /* local sweep */
1808       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1809     }
1810   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1811     if (flag & SOR_ZERO_INITIAL_GUESS) {
1812       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1813       its--;
1814     }
1815     while (its--) {
1816       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1817       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1818 
1819       /* update rhs: bb1 = bb - B*x */
1820       PetscCall(VecScale(mat->lvec, -1.0));
1821       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1822 
1823       /* local sweep */
1824       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1825     }
1826   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1827     if (flag & SOR_ZERO_INITIAL_GUESS) {
1828       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1829       its--;
1830     }
1831     while (its--) {
1832       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1833       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1834 
1835       /* update rhs: bb1 = bb - B*x */
1836       PetscCall(VecScale(mat->lvec, -1.0));
1837       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1838 
1839       /* local sweep */
1840       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1841     }
1842   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1843 
1844   PetscCall(VecDestroy(&bb1));
1845 
1846   matin->factorerrortype = mat->A->factorerrortype;
1847   PetscFunctionReturn(PETSC_SUCCESS);
1848 }
1849 
1850 #if defined(PETSC_HAVE_CUDA)
1851 PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat, MatType, MatReuse, Mat *);
1852 #endif
1853 
1854 /*MC
1855    MATMPISELL - MATMPISELL = "MPISELL" - A matrix type to be used for parallel sparse matrices.
1856 
1857    Options Database Keys:
1858 . -mat_type mpisell - sets the matrix type to `MATMPISELL` during a call to `MatSetFromOptions()`
1859 
1860   Level: beginner
1861 
1862 .seealso: `Mat`, `MATSELL`, `MATSEQSELL` `MatCreateSELL()`
1863 M*/
1864 PETSC_EXTERN PetscErrorCode MatCreate_MPISELL(Mat B)
1865 {
1866   Mat_MPISELL *b;
1867   PetscMPIInt  size;
1868 
1869   PetscFunctionBegin;
1870   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1871   PetscCall(PetscNew(&b));
1872   B->data       = (void *)b;
1873   B->ops[0]     = MatOps_Values;
1874   B->assembled  = PETSC_FALSE;
1875   B->insertmode = NOT_SET_VALUES;
1876   b->size       = size;
1877   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
1878   /* build cache for off array entries formed */
1879   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
1880 
1881   b->donotstash  = PETSC_FALSE;
1882   b->colmap      = NULL;
1883   b->garray      = NULL;
1884   b->roworiented = PETSC_TRUE;
1885 
1886   /* stuff used for matrix vector multiply */
1887   b->lvec  = NULL;
1888   b->Mvctx = NULL;
1889 
1890   /* stuff for MatGetRow() */
1891   b->rowindices   = NULL;
1892   b->rowvalues    = NULL;
1893   b->getrowactive = PETSC_FALSE;
1894 
1895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISELL));
1896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISELL));
1897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPISELL));
1898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELL));
1899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpiaij_C", MatConvert_MPISELL_MPIAIJ));
1900 #if defined(PETSC_HAVE_CUDA)
1901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpisellcuda_C", MatConvert_MPISELL_MPISELLCUDA));
1902 #endif
1903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPISELL));
1904   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISELL));
1905   PetscFunctionReturn(PETSC_SUCCESS);
1906 }
1907