xref: /petsc/src/mat/impls/sell/mpi/mpisell.c (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator,
12    and `MATMPISELL` otherwise.  As a result, for single process communicators,
13   `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
19 
20   Level: beginner
21 
22 .seealso: `Mat`, `MATAIJ`, `MATBAIJ`, `MATSBAIJ`, `MatCreateSELL()`, `MatCreateSeqSELL()`, `MATSEQSELL`, `MATMPISELL`
23 M*/
24 
25 PetscErrorCode MatDiagonalSet_MPISELL(Mat Y, Vec D, InsertMode is)
26 {
27   Mat_MPISELL *sell = (Mat_MPISELL *)Y->data;
28 
29   PetscFunctionBegin;
30   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
31     PetscCall(MatDiagonalSet(sell->A, D, is));
32   } else {
33     PetscCall(MatDiagonalSet_Default(Y, D, is));
34   }
35   PetscFunctionReturn(PETSC_SUCCESS);
36 }
37 
38 /*
39   Local utility routine that creates a mapping from the global column
40 number to the local number in the off-diagonal part of the local
41 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
42 a slightly higher hash table cost; without it it is not scalable (each processor
43 has an order N integer array but is fast to access.
44 */
45 PetscErrorCode MatCreateColmap_MPISELL_Private(Mat mat)
46 {
47   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
48   PetscInt     n    = sell->B->cmap->n, i;
49 
50   PetscFunctionBegin;
51   PetscCheck(sell->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPISELL Matrix was assembled but is missing garray");
52 #if defined(PETSC_USE_CTABLE)
53   PetscCall(PetscHMapICreateWithSize(n, &sell->colmap));
54   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(sell->colmap, sell->garray[i] + 1, i + 1));
55 #else
56   PetscCall(PetscCalloc1(mat->cmap->N + 1, &sell->colmap));
57   for (i = 0; i < n; i++) sell->colmap[sell->garray[i]] = i + 1;
58 #endif
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 #define MatSetValues_SeqSELL_A_Private(row, col, value, addv, orow, ocol) \
63   { \
64     if (col <= lastcol1) low1 = 0; \
65     else high1 = nrow1; \
66     lastcol1 = col; \
67     while (high1 - low1 > 5) { \
68       t = (low1 + high1) / 2; \
69       if (cp1[sliceheight * t] > col) high1 = t; \
70       else low1 = t; \
71     } \
72     for (_i = low1; _i < high1; _i++) { \
73       if (cp1[sliceheight * _i] > col) break; \
74       if (cp1[sliceheight * _i] == col) { \
75         if (addv == ADD_VALUES) vp1[sliceheight * _i] += value; \
76         else vp1[sliceheight * _i] = value; \
77         inserted = PETSC_TRUE; \
78         goto a_noinsert; \
79       } \
80     } \
81     if (value == 0.0 && ignorezeroentries) { \
82       low1  = 0; \
83       high1 = nrow1; \
84       goto a_noinsert; \
85     } \
86     if (nonew == 1) { \
87       low1  = 0; \
88       high1 = nrow1; \
89       goto a_noinsert; \
90     } \
91     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
92     MatSeqXSELLReallocateSELL(A, am, 1, nrow1, a->sliidx, a->sliceheight, row / sliceheight, row, col, a->colidx, a->val, cp1, vp1, nonew, MatScalar); \
93     /* shift up all the later entries in this row */ \
94     for (ii = nrow1 - 1; ii >= _i; ii--) { \
95       cp1[sliceheight * (ii + 1)] = cp1[sliceheight * ii]; \
96       vp1[sliceheight * (ii + 1)] = vp1[sliceheight * ii]; \
97     } \
98     cp1[sliceheight * _i] = col; \
99     vp1[sliceheight * _i] = value; \
100     a->nz++; \
101     nrow1++; \
102     A->nonzerostate++; \
103   a_noinsert:; \
104     a->rlen[row] = nrow1; \
105   }
106 
107 #define MatSetValues_SeqSELL_B_Private(row, col, value, addv, orow, ocol) \
108   { \
109     if (col <= lastcol2) low2 = 0; \
110     else high2 = nrow2; \
111     lastcol2 = col; \
112     while (high2 - low2 > 5) { \
113       t = (low2 + high2) / 2; \
114       if (cp2[sliceheight * t] > col) high2 = t; \
115       else low2 = t; \
116     } \
117     for (_i = low2; _i < high2; _i++) { \
118       if (cp2[sliceheight * _i] > col) break; \
119       if (cp2[sliceheight * _i] == col) { \
120         if (addv == ADD_VALUES) vp2[sliceheight * _i] += value; \
121         else vp2[sliceheight * _i] = value; \
122         inserted = PETSC_TRUE; \
123         goto b_noinsert; \
124       } \
125     } \
126     if (value == 0.0 && ignorezeroentries) { \
127       low2  = 0; \
128       high2 = nrow2; \
129       goto b_noinsert; \
130     } \
131     if (nonew == 1) { \
132       low2  = 0; \
133       high2 = nrow2; \
134       goto b_noinsert; \
135     } \
136     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
137     MatSeqXSELLReallocateSELL(B, bm, 1, nrow2, b->sliidx, b->sliceheight, row / sliceheight, row, col, b->colidx, b->val, cp2, vp2, nonew, MatScalar); \
138     /* shift up all the later entries in this row */ \
139     for (ii = nrow2 - 1; ii >= _i; ii--) { \
140       cp2[sliceheight * (ii + 1)] = cp2[sliceheight * ii]; \
141       vp2[sliceheight * (ii + 1)] = vp2[sliceheight * ii]; \
142     } \
143     cp2[sliceheight * _i] = col; \
144     vp2[sliceheight * _i] = value; \
145     b->nz++; \
146     nrow2++; \
147     B->nonzerostate++; \
148   b_noinsert:; \
149     b->rlen[row] = nrow2; \
150   }
151 
152 PetscErrorCode MatSetValues_MPISELL(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
153 {
154   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
155   PetscScalar  value;
156   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, shift1, shift2;
157   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
158   PetscBool    roworiented = sell->roworiented;
159 
160   /* Some Variables required in the macro */
161   Mat          A                 = sell->A;
162   Mat_SeqSELL *a                 = (Mat_SeqSELL *)A->data;
163   PetscBool    ignorezeroentries = a->ignorezeroentries, found;
164   Mat          B                 = sell->B;
165   Mat_SeqSELL *b                 = (Mat_SeqSELL *)B->data;
166   PetscInt    *cp1, *cp2, ii, _i, nrow1, nrow2, low1, high1, low2, high2, t, lastcol1, lastcol2, sliceheight = a->sliceheight;
167   MatScalar   *vp1, *vp2;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (im[i] < 0) continue;
172     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
173     if (im[i] >= rstart && im[i] < rend) {
174       row      = im[i] - rstart;
175       lastcol1 = -1;
176       shift1   = a->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
177       cp1      = a->colidx + shift1;
178       vp1      = a->val + shift1;
179       nrow1    = a->rlen[row];
180       low1     = 0;
181       high1    = nrow1;
182       lastcol2 = -1;
183       shift2   = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
184       cp2      = b->colidx + shift2;
185       vp2      = b->val + shift2;
186       nrow2    = b->rlen[row];
187       low2     = 0;
188       high2    = nrow2;
189 
190       for (j = 0; j < n; j++) {
191         if (roworiented) value = v[i * n + j];
192         else value = v[i + j * m];
193         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
194         if (in[j] >= cstart && in[j] < cend) {
195           col = in[j] - cstart;
196           MatSetValue_SeqSELL_Private(A, row, col, value, addv, im[i], in[j], cp1, vp1, lastcol1, low1, high1); /* set one value */
197 #if defined(PETSC_HAVE_CUDA)
198           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) A->offloadmask = PETSC_OFFLOAD_CPU;
199 #endif
200         } else if (in[j] < 0) {
201           continue;
202         } else {
203           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
204           if (mat->was_assembled) {
205             if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
206 #if defined(PETSC_USE_CTABLE)
207             PetscCall(PetscHMapIGetWithDefault(sell->colmap, in[j] + 1, 0, &col));
208             col--;
209 #else
210             col = sell->colmap[in[j]] - 1;
211 #endif
212             if (col < 0 && !((Mat_SeqSELL *)(sell->B->data))->nonew) {
213               PetscCall(MatDisAssemble_MPISELL(mat));
214               col = in[j];
215               /* Reinitialize the variables required by MatSetValues_SeqSELL_B_Private() */
216               B      = sell->B;
217               b      = (Mat_SeqSELL *)B->data;
218               shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */
219               cp2    = b->colidx + shift2;
220               vp2    = b->val + shift2;
221               nrow2  = b->rlen[row];
222               low2   = 0;
223               high2  = nrow2;
224               found  = PETSC_FALSE;
225             } else {
226               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
227             }
228           } else col = in[j];
229           MatSetValue_SeqSELL_Private(B, row, col, value, addv, im[i], in[j], cp2, vp2, lastcol2, low2, high2); /* set one value */
230 #if defined(PETSC_HAVE_CUDA)
231           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) B->offloadmask = PETSC_OFFLOAD_CPU;
232 #endif
233         }
234       }
235     } else {
236       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
237       if (!sell->donotstash) {
238         mat->assembled = PETSC_FALSE;
239         if (roworiented) {
240           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
241         } else {
242           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
243         }
244       }
245     }
246   }
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 PetscErrorCode MatGetValues_MPISELL(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
251 {
252   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
253   PetscInt     i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
254   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
255 
256   PetscFunctionBegin;
257   for (i = 0; i < m; i++) {
258     if (idxm[i] < 0) continue; /* negative row */
259     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
260     if (idxm[i] >= rstart && idxm[i] < rend) {
261       row = idxm[i] - rstart;
262       for (j = 0; j < n; j++) {
263         if (idxn[j] < 0) continue; /* negative column */
264         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
265         if (idxn[j] >= cstart && idxn[j] < cend) {
266           col = idxn[j] - cstart;
267           PetscCall(MatGetValues(sell->A, 1, &row, 1, &col, v + i * n + j));
268         } else {
269           if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat));
270 #if defined(PETSC_USE_CTABLE)
271           PetscCall(PetscHMapIGetWithDefault(sell->colmap, idxn[j] + 1, 0, &col));
272           col--;
273 #else
274           col = sell->colmap[idxn[j]] - 1;
275 #endif
276           if ((col < 0) || (sell->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
277           else PetscCall(MatGetValues(sell->B, 1, &row, 1, &col, v + i * n + j));
278         }
279       }
280     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
281   }
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 extern PetscErrorCode MatMultDiagonalBlock_MPISELL(Mat, Vec, Vec);
286 
287 PetscErrorCode MatAssemblyBegin_MPISELL(Mat mat, MatAssemblyType mode)
288 {
289   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
290   PetscInt     nstash, reallocs;
291 
292   PetscFunctionBegin;
293   if (sell->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
294 
295   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
296   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
297   PetscCall(PetscInfo(sell->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 PetscErrorCode MatAssemblyEnd_MPISELL(Mat mat, MatAssemblyType mode)
302 {
303   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
304   PetscMPIInt  n;
305   PetscInt     i, flg;
306   PetscInt    *row, *col;
307   PetscScalar *val;
308   PetscBool    other_disassembled;
309   /* do not use 'b = (Mat_SeqSELL*)sell->B->data' as B can be reset in disassembly */
310   PetscFunctionBegin;
311   if (!sell->donotstash && !mat->nooffprocentries) {
312     while (1) {
313       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
314       if (!flg) break;
315 
316       for (i = 0; i < n; i++) { /* assemble one by one */
317         PetscCall(MatSetValues_MPISELL(mat, 1, row + i, 1, col + i, val + i, mat->insertmode));
318       }
319     }
320     PetscCall(MatStashScatterEnd_Private(&mat->stash));
321   }
322 #if defined(PETSC_HAVE_CUDA)
323   if (mat->offloadmask == PETSC_OFFLOAD_CPU) sell->A->offloadmask = PETSC_OFFLOAD_CPU;
324 #endif
325   PetscCall(MatAssemblyBegin(sell->A, mode));
326   PetscCall(MatAssemblyEnd(sell->A, mode));
327 
328   /*
329      determine if any processor has disassembled, if so we must
330      also disassemble ourselves, in order that we may reassemble.
331   */
332   /*
333      if nonzero structure of submatrix B cannot change then we know that
334      no processor disassembled thus we can skip this stuff
335   */
336   if (!((Mat_SeqSELL *)sell->B->data)->nonew) {
337     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
338     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISELL(mat));
339   }
340   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISELL(mat));
341 #if defined(PETSC_HAVE_CUDA)
342   if (mat->offloadmask == PETSC_OFFLOAD_CPU && sell->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) sell->B->offloadmask = PETSC_OFFLOAD_CPU;
343 #endif
344   PetscCall(MatAssemblyBegin(sell->B, mode));
345   PetscCall(MatAssemblyEnd(sell->B, mode));
346   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
347   sell->rowvalues = NULL;
348   PetscCall(VecDestroy(&sell->diag));
349 
350   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
351   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqSELL *)(sell->A->data))->nonew) {
352     PetscObjectState state = sell->A->nonzerostate + sell->B->nonzerostate;
353     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
354   }
355 #if defined(PETSC_HAVE_CUDA)
356   mat->offloadmask = PETSC_OFFLOAD_BOTH;
357 #endif
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 PetscErrorCode MatZeroEntries_MPISELL(Mat A)
362 {
363   Mat_MPISELL *l = (Mat_MPISELL *)A->data;
364 
365   PetscFunctionBegin;
366   PetscCall(MatZeroEntries(l->A));
367   PetscCall(MatZeroEntries(l->B));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 PetscErrorCode MatMult_MPISELL(Mat A, Vec xx, Vec yy)
372 {
373   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
374   PetscInt     nt;
375 
376   PetscFunctionBegin;
377   PetscCall(VecGetLocalSize(xx, &nt));
378   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
379   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
380   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
381   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
382   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 PetscErrorCode MatMultDiagonalBlock_MPISELL(Mat A, Vec bb, Vec xx)
387 {
388   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
389 
390   PetscFunctionBegin;
391   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
392   PetscFunctionReturn(PETSC_SUCCESS);
393 }
394 
395 PetscErrorCode MatMultAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
396 {
397   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
398 
399   PetscFunctionBegin;
400   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
401   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
402   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
403   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
404   PetscFunctionReturn(PETSC_SUCCESS);
405 }
406 
407 PetscErrorCode MatMultTranspose_MPISELL(Mat A, Vec xx, Vec yy)
408 {
409   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
410 
411   PetscFunctionBegin;
412   /* do nondiagonal part */
413   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
414   /* do local part */
415   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
416   /* add partial results together */
417   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
418   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 
422 PetscErrorCode MatIsTranspose_MPISELL(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
423 {
424   MPI_Comm     comm;
425   Mat_MPISELL *Asell = (Mat_MPISELL *)Amat->data, *Bsell;
426   Mat          Adia  = Asell->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
427   IS           Me, Notme;
428   PetscInt     M, N, first, last, *notme, i;
429   PetscMPIInt  size;
430 
431   PetscFunctionBegin;
432   /* Easy test: symmetric diagonal block */
433   Bsell = (Mat_MPISELL *)Bmat->data;
434   Bdia  = Bsell->A;
435   PetscCall(MatIsTranspose(Adia, Bdia, tol, f));
436   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
437   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
438   PetscCallMPI(MPI_Comm_size(comm, &size));
439   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
440 
441   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
442   PetscCall(MatGetSize(Amat, &M, &N));
443   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
444   PetscCall(PetscMalloc1(N - last + first, &notme));
445   for (i = 0; i < first; i++) notme[i] = i;
446   for (i = last; i < M; i++) notme[i - last + first] = i;
447   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
448   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
449   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
450   Aoff = Aoffs[0];
451   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
452   Boff = Boffs[0];
453   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
454   PetscCall(MatDestroyMatrices(1, &Aoffs));
455   PetscCall(MatDestroyMatrices(1, &Boffs));
456   PetscCall(ISDestroy(&Me));
457   PetscCall(ISDestroy(&Notme));
458   PetscCall(PetscFree(notme));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 PetscErrorCode MatMultTransposeAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz)
463 {
464   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
465 
466   PetscFunctionBegin;
467   /* do nondiagonal part */
468   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
469   /* do local part */
470   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
471   /* add partial results together */
472   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
473   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 /*
478   This only works correctly for square matrices where the subblock A->A is the
479    diagonal block
480 */
481 PetscErrorCode MatGetDiagonal_MPISELL(Mat A, Vec v)
482 {
483   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
484 
485   PetscFunctionBegin;
486   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
487   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
488   PetscCall(MatGetDiagonal(a->A, v));
489   PetscFunctionReturn(PETSC_SUCCESS);
490 }
491 
492 PetscErrorCode MatScale_MPISELL(Mat A, PetscScalar aa)
493 {
494   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
495 
496   PetscFunctionBegin;
497   PetscCall(MatScale(a->A, aa));
498   PetscCall(MatScale(a->B, aa));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501 
502 PetscErrorCode MatDestroy_MPISELL(Mat mat)
503 {
504   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
505 
506   PetscFunctionBegin;
507 #if defined(PETSC_USE_LOG)
508   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
509 #endif
510   PetscCall(MatStashDestroy_Private(&mat->stash));
511   PetscCall(VecDestroy(&sell->diag));
512   PetscCall(MatDestroy(&sell->A));
513   PetscCall(MatDestroy(&sell->B));
514 #if defined(PETSC_USE_CTABLE)
515   PetscCall(PetscHMapIDestroy(&sell->colmap));
516 #else
517   PetscCall(PetscFree(sell->colmap));
518 #endif
519   PetscCall(PetscFree(sell->garray));
520   PetscCall(VecDestroy(&sell->lvec));
521   PetscCall(VecScatterDestroy(&sell->Mvctx));
522   PetscCall(PetscFree2(sell->rowvalues, sell->rowindices));
523   PetscCall(PetscFree(sell->ld));
524   PetscCall(PetscFree(mat->data));
525 
526   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
527   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
528   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
529   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
530   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISELLSetPreallocation_C", NULL));
531   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpiaij_C", NULL));
532 #if defined(PETSC_HAVE_CUDA)
533   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpisellcuda_C", NULL));
534 #endif
535   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 #include <petscdraw.h>
540 PetscErrorCode MatView_MPISELL_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
541 {
542   Mat_MPISELL      *sell = (Mat_MPISELL *)mat->data;
543   PetscMPIInt       rank = sell->rank, size = sell->size;
544   PetscBool         isdraw, iascii, isbinary;
545   PetscViewer       sviewer;
546   PetscViewerFormat format;
547 
548   PetscFunctionBegin;
549   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
550   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
551   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
552   if (iascii) {
553     PetscCall(PetscViewerGetFormat(viewer, &format));
554     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
555       MatInfo   info;
556       PetscInt *inodes;
557 
558       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
559       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
560       PetscCall(MatInodeGetInodeSizes(sell->A, NULL, &inodes, NULL));
561       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
562       if (!inodes) {
563         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
564                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
565       } else {
566         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used,
567                                                      (PetscInt)info.nz_allocated, (PetscInt)info.memory));
568       }
569       PetscCall(MatGetInfo(sell->A, MAT_LOCAL, &info));
570       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
571       PetscCall(MatGetInfo(sell->B, MAT_LOCAL, &info));
572       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
573       PetscCall(PetscViewerFlush(viewer));
574       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
575       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
576       PetscCall(VecScatterView(sell->Mvctx, viewer));
577       PetscFunctionReturn(PETSC_SUCCESS);
578     } else if (format == PETSC_VIEWER_ASCII_INFO) {
579       PetscInt inodecount, inodelimit, *inodes;
580       PetscCall(MatInodeGetInodeSizes(sell->A, &inodecount, &inodes, &inodelimit));
581       if (inodes) {
582         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
583       } else {
584         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
585       }
586       PetscFunctionReturn(PETSC_SUCCESS);
587     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
588       PetscFunctionReturn(PETSC_SUCCESS);
589     }
590   } else if (isbinary) {
591     if (size == 1) {
592       PetscCall(PetscObjectSetName((PetscObject)sell->A, ((PetscObject)mat)->name));
593       PetscCall(MatView(sell->A, viewer));
594     } else {
595       /* PetscCall(MatView_MPISELL_Binary(mat,viewer)); */
596     }
597     PetscFunctionReturn(PETSC_SUCCESS);
598   } else if (isdraw) {
599     PetscDraw draw;
600     PetscBool isnull;
601     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
602     PetscCall(PetscDrawIsNull(draw, &isnull));
603     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
604   }
605 
606   {
607     /* assemble the entire matrix onto first processor. */
608     Mat          A;
609     Mat_SeqSELL *Aloc;
610     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *acolidx, row, col, i, j;
611     MatScalar   *aval;
612     PetscBool    isnonzero;
613 
614     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
615     if (rank == 0) {
616       PetscCall(MatSetSizes(A, M, N, M, N));
617     } else {
618       PetscCall(MatSetSizes(A, 0, 0, M, N));
619     }
620     /* This is just a temporary matrix, so explicitly using MATMPISELL is probably best */
621     PetscCall(MatSetType(A, MATMPISELL));
622     PetscCall(MatMPISELLSetPreallocation(A, 0, NULL, 0, NULL));
623     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
624 
625     /* copy over the A part */
626     Aloc    = (Mat_SeqSELL *)sell->A->data;
627     acolidx = Aloc->colidx;
628     aval    = Aloc->val;
629     for (i = 0; i < Aloc->totalslices; i++) { /* loop over slices */
630       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
631         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
632         if (isnonzero) { /* check the mask bit */
633           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
634           col = *acolidx + mat->rmap->rstart;
635           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
636         }
637         aval++;
638         acolidx++;
639       }
640     }
641 
642     /* copy over the B part */
643     Aloc    = (Mat_SeqSELL *)sell->B->data;
644     acolidx = Aloc->colidx;
645     aval    = Aloc->val;
646     for (i = 0; i < Aloc->totalslices; i++) {
647       for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) {
648         isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]);
649         if (isnonzero) {
650           row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart;
651           col = sell->garray[*acolidx];
652           PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES));
653         }
654         aval++;
655         acolidx++;
656       }
657     }
658 
659     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
660     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
661     /*
662        Everyone has to call to draw the matrix since the graphics waits are
663        synchronized across all processors that share the PetscDraw object
664     */
665     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
666     if (rank == 0) {
667       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISELL *)(A->data))->A, ((PetscObject)mat)->name));
668       PetscCall(MatView_SeqSELL(((Mat_MPISELL *)(A->data))->A, sviewer));
669     }
670     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
671     PetscCall(PetscViewerFlush(viewer));
672     PetscCall(MatDestroy(&A));
673   }
674   PetscFunctionReturn(PETSC_SUCCESS);
675 }
676 
677 PetscErrorCode MatView_MPISELL(Mat mat, PetscViewer viewer)
678 {
679   PetscBool iascii, isdraw, issocket, isbinary;
680 
681   PetscFunctionBegin;
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
683   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
684   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
685   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
686   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPISELL_ASCIIorDraworSocket(mat, viewer));
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 PetscErrorCode MatGetGhosts_MPISELL(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
691 {
692   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
693 
694   PetscFunctionBegin;
695   PetscCall(MatGetSize(sell->B, NULL, nghosts));
696   if (ghosts) *ghosts = sell->garray;
697   PetscFunctionReturn(PETSC_SUCCESS);
698 }
699 
700 PetscErrorCode MatGetInfo_MPISELL(Mat matin, MatInfoType flag, MatInfo *info)
701 {
702   Mat_MPISELL   *mat = (Mat_MPISELL *)matin->data;
703   Mat            A = mat->A, B = mat->B;
704   PetscLogDouble isend[5], irecv[5];
705 
706   PetscFunctionBegin;
707   info->block_size = 1.0;
708   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
709 
710   isend[0] = info->nz_used;
711   isend[1] = info->nz_allocated;
712   isend[2] = info->nz_unneeded;
713   isend[3] = info->memory;
714   isend[4] = info->mallocs;
715 
716   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
717 
718   isend[0] += info->nz_used;
719   isend[1] += info->nz_allocated;
720   isend[2] += info->nz_unneeded;
721   isend[3] += info->memory;
722   isend[4] += info->mallocs;
723   if (flag == MAT_LOCAL) {
724     info->nz_used      = isend[0];
725     info->nz_allocated = isend[1];
726     info->nz_unneeded  = isend[2];
727     info->memory       = isend[3];
728     info->mallocs      = isend[4];
729   } else if (flag == MAT_GLOBAL_MAX) {
730     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
731 
732     info->nz_used      = irecv[0];
733     info->nz_allocated = irecv[1];
734     info->nz_unneeded  = irecv[2];
735     info->memory       = irecv[3];
736     info->mallocs      = irecv[4];
737   } else if (flag == MAT_GLOBAL_SUM) {
738     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
739 
740     info->nz_used      = irecv[0];
741     info->nz_allocated = irecv[1];
742     info->nz_unneeded  = irecv[2];
743     info->memory       = irecv[3];
744     info->mallocs      = irecv[4];
745   }
746   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
747   info->fill_ratio_needed = 0;
748   info->factor_mallocs    = 0;
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatSetOption_MPISELL(Mat A, MatOption op, PetscBool flg)
753 {
754   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
755 
756   PetscFunctionBegin;
757   switch (op) {
758   case MAT_NEW_NONZERO_LOCATIONS:
759   case MAT_NEW_NONZERO_ALLOCATION_ERR:
760   case MAT_UNUSED_NONZERO_LOCATION_ERR:
761   case MAT_KEEP_NONZERO_PATTERN:
762   case MAT_NEW_NONZERO_LOCATION_ERR:
763   case MAT_USE_INODES:
764   case MAT_IGNORE_ZERO_ENTRIES:
765     MatCheckPreallocated(A, 1);
766     PetscCall(MatSetOption(a->A, op, flg));
767     PetscCall(MatSetOption(a->B, op, flg));
768     break;
769   case MAT_ROW_ORIENTED:
770     MatCheckPreallocated(A, 1);
771     a->roworiented = flg;
772 
773     PetscCall(MatSetOption(a->A, op, flg));
774     PetscCall(MatSetOption(a->B, op, flg));
775     break;
776   case MAT_FORCE_DIAGONAL_ENTRIES:
777   case MAT_SORTED_FULL:
778     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
779     break;
780   case MAT_IGNORE_OFF_PROC_ENTRIES:
781     a->donotstash = flg;
782     break;
783   case MAT_SPD:
784   case MAT_SPD_ETERNAL:
785     break;
786   case MAT_SYMMETRIC:
787     MatCheckPreallocated(A, 1);
788     PetscCall(MatSetOption(a->A, op, flg));
789     break;
790   case MAT_STRUCTURALLY_SYMMETRIC:
791     MatCheckPreallocated(A, 1);
792     PetscCall(MatSetOption(a->A, op, flg));
793     break;
794   case MAT_HERMITIAN:
795     MatCheckPreallocated(A, 1);
796     PetscCall(MatSetOption(a->A, op, flg));
797     break;
798   case MAT_SYMMETRY_ETERNAL:
799     MatCheckPreallocated(A, 1);
800     PetscCall(MatSetOption(a->A, op, flg));
801     break;
802   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
803     MatCheckPreallocated(A, 1);
804     PetscCall(MatSetOption(a->A, op, flg));
805     break;
806   default:
807     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
808   }
809   PetscFunctionReturn(PETSC_SUCCESS);
810 }
811 
812 PetscErrorCode MatDiagonalScale_MPISELL(Mat mat, Vec ll, Vec rr)
813 {
814   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
815   Mat          a = sell->A, b = sell->B;
816   PetscInt     s1, s2, s3;
817 
818   PetscFunctionBegin;
819   PetscCall(MatGetLocalSize(mat, &s2, &s3));
820   if (rr) {
821     PetscCall(VecGetLocalSize(rr, &s1));
822     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
823     /* Overlap communication with computation. */
824     PetscCall(VecScatterBegin(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
825   }
826   if (ll) {
827     PetscCall(VecGetLocalSize(ll, &s1));
828     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
829     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
830   }
831   /* scale  the diagonal block */
832   PetscUseTypeMethod(a, diagonalscale, ll, rr);
833 
834   if (rr) {
835     /* Do a scatter end and then right scale the off-diagonal block */
836     PetscCall(VecScatterEnd(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD));
837     PetscUseTypeMethod(b, diagonalscale, NULL, sell->lvec);
838   }
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 PetscErrorCode MatSetUnfactored_MPISELL(Mat A)
843 {
844   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
845 
846   PetscFunctionBegin;
847   PetscCall(MatSetUnfactored(a->A));
848   PetscFunctionReturn(PETSC_SUCCESS);
849 }
850 
851 PetscErrorCode MatEqual_MPISELL(Mat A, Mat B, PetscBool *flag)
852 {
853   Mat_MPISELL *matB = (Mat_MPISELL *)B->data, *matA = (Mat_MPISELL *)A->data;
854   Mat          a, b, c, d;
855   PetscBool    flg;
856 
857   PetscFunctionBegin;
858   a = matA->A;
859   b = matA->B;
860   c = matB->A;
861   d = matB->B;
862 
863   PetscCall(MatEqual(a, c, &flg));
864   if (flg) PetscCall(MatEqual(b, d, &flg));
865   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
866   PetscFunctionReturn(PETSC_SUCCESS);
867 }
868 
869 PetscErrorCode MatCopy_MPISELL(Mat A, Mat B, MatStructure str)
870 {
871   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
872   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
873 
874   PetscFunctionBegin;
875   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
876   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
877     /* because of the column compression in the off-processor part of the matrix a->B,
878        the number of columns in a->B and b->B may be different, hence we cannot call
879        the MatCopy() directly on the two parts. If need be, we can provide a more
880        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
881        then copying the submatrices */
882     PetscCall(MatCopy_Basic(A, B, str));
883   } else {
884     PetscCall(MatCopy(a->A, b->A, str));
885     PetscCall(MatCopy(a->B, b->B, str));
886   }
887   PetscFunctionReturn(PETSC_SUCCESS);
888 }
889 
890 PetscErrorCode MatSetUp_MPISELL(Mat A)
891 {
892   PetscFunctionBegin;
893   PetscCall(MatMPISELLSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
897 extern PetscErrorCode MatConjugate_SeqSELL(Mat);
898 
899 PetscErrorCode MatConjugate_MPISELL(Mat mat)
900 {
901   PetscFunctionBegin;
902   if (PetscDefined(USE_COMPLEX)) {
903     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
904 
905     PetscCall(MatConjugate_SeqSELL(sell->A));
906     PetscCall(MatConjugate_SeqSELL(sell->B));
907   }
908   PetscFunctionReturn(PETSC_SUCCESS);
909 }
910 
911 PetscErrorCode MatRealPart_MPISELL(Mat A)
912 {
913   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
914 
915   PetscFunctionBegin;
916   PetscCall(MatRealPart(a->A));
917   PetscCall(MatRealPart(a->B));
918   PetscFunctionReturn(PETSC_SUCCESS);
919 }
920 
921 PetscErrorCode MatImaginaryPart_MPISELL(Mat A)
922 {
923   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
924 
925   PetscFunctionBegin;
926   PetscCall(MatImaginaryPart(a->A));
927   PetscCall(MatImaginaryPart(a->B));
928   PetscFunctionReturn(PETSC_SUCCESS);
929 }
930 
931 PetscErrorCode MatInvertBlockDiagonal_MPISELL(Mat A, const PetscScalar **values)
932 {
933   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
934 
935   PetscFunctionBegin;
936   PetscCall(MatInvertBlockDiagonal(a->A, values));
937   A->factorerrortype = a->A->factorerrortype;
938   PetscFunctionReturn(PETSC_SUCCESS);
939 }
940 
941 static PetscErrorCode MatSetRandom_MPISELL(Mat x, PetscRandom rctx)
942 {
943   Mat_MPISELL *sell = (Mat_MPISELL *)x->data;
944 
945   PetscFunctionBegin;
946   PetscCall(MatSetRandom(sell->A, rctx));
947   PetscCall(MatSetRandom(sell->B, rctx));
948   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
949   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
950   PetscFunctionReturn(PETSC_SUCCESS);
951 }
952 
953 PetscErrorCode MatSetFromOptions_MPISELL(Mat A, PetscOptionItems *PetscOptionsObject)
954 {
955   PetscFunctionBegin;
956   PetscOptionsHeadBegin(PetscOptionsObject, "MPISELL options");
957   PetscOptionsHeadEnd();
958   PetscFunctionReturn(PETSC_SUCCESS);
959 }
960 
961 PetscErrorCode MatShift_MPISELL(Mat Y, PetscScalar a)
962 {
963   Mat_MPISELL *msell = (Mat_MPISELL *)Y->data;
964   Mat_SeqSELL *sell  = (Mat_SeqSELL *)msell->A->data;
965 
966   PetscFunctionBegin;
967   if (!Y->preallocated) {
968     PetscCall(MatMPISELLSetPreallocation(Y, 1, NULL, 0, NULL));
969   } else if (!sell->nz) {
970     PetscInt nonew = sell->nonew;
971     PetscCall(MatSeqSELLSetPreallocation(msell->A, 1, NULL));
972     sell->nonew = nonew;
973   }
974   PetscCall(MatShift_Basic(Y, a));
975   PetscFunctionReturn(PETSC_SUCCESS);
976 }
977 
978 PetscErrorCode MatMissingDiagonal_MPISELL(Mat A, PetscBool *missing, PetscInt *d)
979 {
980   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
981 
982   PetscFunctionBegin;
983   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
984   PetscCall(MatMissingDiagonal(a->A, missing, d));
985   if (d) {
986     PetscInt rstart;
987     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
988     *d += rstart;
989   }
990   PetscFunctionReturn(PETSC_SUCCESS);
991 }
992 
993 PetscErrorCode MatGetDiagonalBlock_MPISELL(Mat A, Mat *a)
994 {
995   PetscFunctionBegin;
996   *a = ((Mat_MPISELL *)A->data)->A;
997   PetscFunctionReturn(PETSC_SUCCESS);
998 }
999 
1000 static struct _MatOps MatOps_Values = {MatSetValues_MPISELL,
1001                                        NULL,
1002                                        NULL,
1003                                        MatMult_MPISELL,
1004                                        /* 4*/ MatMultAdd_MPISELL,
1005                                        MatMultTranspose_MPISELL,
1006                                        MatMultTransposeAdd_MPISELL,
1007                                        NULL,
1008                                        NULL,
1009                                        NULL,
1010                                        /*10*/ NULL,
1011                                        NULL,
1012                                        NULL,
1013                                        MatSOR_MPISELL,
1014                                        NULL,
1015                                        /*15*/ MatGetInfo_MPISELL,
1016                                        MatEqual_MPISELL,
1017                                        MatGetDiagonal_MPISELL,
1018                                        MatDiagonalScale_MPISELL,
1019                                        NULL,
1020                                        /*20*/ MatAssemblyBegin_MPISELL,
1021                                        MatAssemblyEnd_MPISELL,
1022                                        MatSetOption_MPISELL,
1023                                        MatZeroEntries_MPISELL,
1024                                        /*24*/ NULL,
1025                                        NULL,
1026                                        NULL,
1027                                        NULL,
1028                                        NULL,
1029                                        /*29*/ MatSetUp_MPISELL,
1030                                        NULL,
1031                                        NULL,
1032                                        MatGetDiagonalBlock_MPISELL,
1033                                        NULL,
1034                                        /*34*/ MatDuplicate_MPISELL,
1035                                        NULL,
1036                                        NULL,
1037                                        NULL,
1038                                        NULL,
1039                                        /*39*/ NULL,
1040                                        NULL,
1041                                        NULL,
1042                                        MatGetValues_MPISELL,
1043                                        MatCopy_MPISELL,
1044                                        /*44*/ NULL,
1045                                        MatScale_MPISELL,
1046                                        MatShift_MPISELL,
1047                                        MatDiagonalSet_MPISELL,
1048                                        NULL,
1049                                        /*49*/ MatSetRandom_MPISELL,
1050                                        NULL,
1051                                        NULL,
1052                                        NULL,
1053                                        NULL,
1054                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
1055                                        NULL,
1056                                        MatSetUnfactored_MPISELL,
1057                                        NULL,
1058                                        NULL,
1059                                        /*59*/ NULL,
1060                                        MatDestroy_MPISELL,
1061                                        MatView_MPISELL,
1062                                        NULL,
1063                                        NULL,
1064                                        /*64*/ NULL,
1065                                        NULL,
1066                                        NULL,
1067                                        NULL,
1068                                        NULL,
1069                                        /*69*/ NULL,
1070                                        NULL,
1071                                        NULL,
1072                                        NULL,
1073                                        NULL,
1074                                        NULL,
1075                                        /*75*/ MatFDColoringApply_AIJ, /* reuse AIJ function */
1076                                        MatSetFromOptions_MPISELL,
1077                                        NULL,
1078                                        NULL,
1079                                        NULL,
1080                                        /*80*/ NULL,
1081                                        NULL,
1082                                        NULL,
1083                                        /*83*/ NULL,
1084                                        NULL,
1085                                        NULL,
1086                                        NULL,
1087                                        NULL,
1088                                        NULL,
1089                                        /*89*/ NULL,
1090                                        NULL,
1091                                        NULL,
1092                                        NULL,
1093                                        NULL,
1094                                        /*94*/ NULL,
1095                                        NULL,
1096                                        NULL,
1097                                        NULL,
1098                                        NULL,
1099                                        /*99*/ NULL,
1100                                        NULL,
1101                                        NULL,
1102                                        MatConjugate_MPISELL,
1103                                        NULL,
1104                                        /*104*/ NULL,
1105                                        MatRealPart_MPISELL,
1106                                        MatImaginaryPart_MPISELL,
1107                                        NULL,
1108                                        NULL,
1109                                        /*109*/ NULL,
1110                                        NULL,
1111                                        NULL,
1112                                        NULL,
1113                                        MatMissingDiagonal_MPISELL,
1114                                        /*114*/ NULL,
1115                                        NULL,
1116                                        MatGetGhosts_MPISELL,
1117                                        NULL,
1118                                        NULL,
1119                                        /*119*/ NULL,
1120                                        NULL,
1121                                        NULL,
1122                                        NULL,
1123                                        NULL,
1124                                        /*124*/ NULL,
1125                                        NULL,
1126                                        MatInvertBlockDiagonal_MPISELL,
1127                                        NULL,
1128                                        NULL,
1129                                        /*129*/ NULL,
1130                                        NULL,
1131                                        NULL,
1132                                        NULL,
1133                                        NULL,
1134                                        /*134*/ NULL,
1135                                        NULL,
1136                                        NULL,
1137                                        NULL,
1138                                        NULL,
1139                                        /*139*/ NULL,
1140                                        NULL,
1141                                        NULL,
1142                                        MatFDColoringSetUp_MPIXAIJ,
1143                                        NULL,
1144                                        /*144*/ NULL,
1145                                        NULL,
1146                                        NULL,
1147                                        NULL,
1148                                        NULL,
1149                                        NULL,
1150                                        /*150*/ NULL,
1151                                        NULL};
1152 
1153 PetscErrorCode MatStoreValues_MPISELL(Mat mat)
1154 {
1155   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCall(MatStoreValues(sell->A));
1159   PetscCall(MatStoreValues(sell->B));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 PetscErrorCode MatRetrieveValues_MPISELL(Mat mat)
1164 {
1165   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCall(MatRetrieveValues(sell->A));
1169   PetscCall(MatRetrieveValues(sell->B));
1170   PetscFunctionReturn(PETSC_SUCCESS);
1171 }
1172 
1173 PetscErrorCode MatMPISELLSetPreallocation_MPISELL(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
1174 {
1175   Mat_MPISELL *b;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(PetscLayoutSetUp(B->rmap));
1179   PetscCall(PetscLayoutSetUp(B->cmap));
1180   b = (Mat_MPISELL *)B->data;
1181 
1182   if (!B->preallocated) {
1183     /* Explicitly create 2 MATSEQSELL matrices. */
1184     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1185     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1186     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
1187     PetscCall(MatSetType(b->A, MATSEQSELL));
1188     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1189     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
1190     PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
1191     PetscCall(MatSetType(b->B, MATSEQSELL));
1192   }
1193 
1194   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
1195   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
1196   B->preallocated  = PETSC_TRUE;
1197   B->was_assembled = PETSC_FALSE;
1198   /*
1199     critical for MatAssemblyEnd to work.
1200     MatAssemblyBegin checks it to set up was_assembled
1201     and MatAssemblyEnd checks was_assembled to determine whether to build garray
1202   */
1203   B->assembled = PETSC_FALSE;
1204   PetscFunctionReturn(PETSC_SUCCESS);
1205 }
1206 
1207 PetscErrorCode MatDuplicate_MPISELL(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
1208 {
1209   Mat          mat;
1210   Mat_MPISELL *a, *oldmat = (Mat_MPISELL *)matin->data;
1211 
1212   PetscFunctionBegin;
1213   *newmat = NULL;
1214   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
1215   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
1216   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
1217   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
1218   a = (Mat_MPISELL *)mat->data;
1219 
1220   mat->factortype   = matin->factortype;
1221   mat->assembled    = PETSC_TRUE;
1222   mat->insertmode   = NOT_SET_VALUES;
1223   mat->preallocated = PETSC_TRUE;
1224 
1225   a->size         = oldmat->size;
1226   a->rank         = oldmat->rank;
1227   a->donotstash   = oldmat->donotstash;
1228   a->roworiented  = oldmat->roworiented;
1229   a->rowindices   = NULL;
1230   a->rowvalues    = NULL;
1231   a->getrowactive = PETSC_FALSE;
1232 
1233   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
1234   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
1235 
1236   if (oldmat->colmap) {
1237 #if defined(PETSC_USE_CTABLE)
1238     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
1239 #else
1240     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
1241     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
1242 #endif
1243   } else a->colmap = NULL;
1244   if (oldmat->garray) {
1245     PetscInt len;
1246     len = oldmat->B->cmap->n;
1247     PetscCall(PetscMalloc1(len + 1, &a->garray));
1248     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
1249   } else a->garray = NULL;
1250 
1251   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
1252   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
1253   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1254   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
1255   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
1256   *newmat = mat;
1257   PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259 
1260 /*@C
1261    MatMPISELLSetPreallocation - Preallocates memory for a `MATMPISELL` sparse parallel matrix in sell format.
1262    For good matrix assembly performance the user should preallocate the matrix storage by
1263    setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
1264 
1265    Collective
1266 
1267    Input Parameters:
1268 +  B - the matrix
1269 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1270            (same value is used for all local rows)
1271 .  d_nnz - array containing the number of nonzeros in the various rows of the
1272            DIAGONAL portion of the local submatrix (possibly different for each row)
1273            or NULL (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
1274            The size of this array is equal to the number of local rows, i.e 'm'.
1275            For matrices that will be factored, you must leave room for (and set)
1276            the diagonal entry even if it is zero.
1277 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1278            submatrix (same value is used for all local rows).
1279 -  o_nnz - array containing the number of nonzeros in the various rows of the
1280            OFF-DIAGONAL portion of the local submatrix (possibly different for
1281            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
1282            structure. The size of this array is equal to the number
1283            of local rows, i.e 'm'.
1284 
1285    Example usage:
1286    Consider the following 8x8 matrix with 34 non-zero values, that is
1287    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1288    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1289    as follows
1290 
1291 .vb
1292             1  2  0  |  0  3  0  |  0  4
1293     Proc0   0  5  6  |  7  0  0  |  8  0
1294             9  0 10  | 11  0  0  | 12  0
1295     -------------------------------------
1296            13  0 14  | 15 16 17  |  0  0
1297     Proc1   0 18  0  | 19 20 21  |  0  0
1298             0  0  0  | 22 23  0  | 24  0
1299     -------------------------------------
1300     Proc2  25 26 27  |  0  0 28  | 29  0
1301            30  0  0  | 31 32 33  |  0 34
1302 .ve
1303 
1304    This can be represented as a collection of submatrices as
1305 
1306 .vb
1307       A B C
1308       D E F
1309       G H I
1310 .ve
1311 
1312    Where the submatrices A,B,C are owned by proc0, D,E,F are
1313    owned by proc1, G,H,I are owned by proc2.
1314 
1315    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1316    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1317    The 'M','N' parameters are 8,8, and have the same values on all procs.
1318 
1319    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1320    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1321    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1322    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1323    part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1324    matrix, ans [DF] as another SeqSELL matrix.
1325 
1326    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
1327    allocated for every row of the local diagonal submatrix, and o_nz
1328    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1329    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
1330    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1331    In this case, the values of d_nz,o_nz are
1332 .vb
1333      proc0  dnz = 2, o_nz = 2
1334      proc1  dnz = 3, o_nz = 2
1335      proc2  dnz = 1, o_nz = 4
1336 .ve
1337    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1338    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1339    for proc3. i.e we are using 12+15+10=37 storage locations to store
1340    34 values.
1341 
1342    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
1343    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1344    In the above case the values for d_nnz,o_nnz are
1345 .vb
1346      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
1347      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
1348      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
1349 .ve
1350    Here the space allocated is according to nz (or maximum values in the nnz
1351    if nnz is provided) for DIAGONAL and OFF-DIAGONAL submatrices, i.e (2+2+3+2)*3+(1+4)*2=37
1352 
1353    Level: intermediate
1354 
1355    Notes:
1356    If the *_nnz parameter is given then the *_nz parameter is ignored
1357 
1358    The stored row and column indices begin with zero.
1359 
1360    The parallel matrix is partitioned such that the first m0 rows belong to
1361    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1362    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1363 
1364    The DIAGONAL portion of the local submatrix of a processor can be defined
1365    as the submatrix which is obtained by extraction the part corresponding to
1366    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
1367    first row that belongs to the processor, r2 is the last row belonging to
1368    the this processor, and c1-c2 is range of indices of the local part of a
1369    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
1370    common case of a square matrix, the row and column ranges are the same and
1371    the DIAGONAL part is also square. The remaining portion of the local
1372    submatrix (mxN) constitute the OFF-DIAGONAL portion.
1373 
1374    If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
1375 
1376    You can call `MatGetInfo()` to get information on how effective the preallocation was;
1377    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1378    You can also run with the option -info and look for messages with the string
1379    malloc in them to see if additional memory allocation was needed.
1380 
1381 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatCreatesell()`,
1382           `MATMPISELL`, `MatGetInfo()`, `PetscSplitOwnership()`, `MATSELL`
1383 @*/
1384 PetscErrorCode MatMPISELLSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
1385 {
1386   PetscFunctionBegin;
1387   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1388   PetscValidType(B, 1);
1389   PetscTryMethod(B, "MatMPISELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
1390   PetscFunctionReturn(PETSC_SUCCESS);
1391 }
1392 
1393 /*MC
1394    MATMPISELL - MATMPISELL = "mpisell" - A matrix type to be used for MPI sparse matrices,
1395    based on the sliced Ellpack format
1396 
1397    Options Database Key:
1398 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()`
1399 
1400    Level: beginner
1401 
1402 .seealso: `Mat`, `MatCreateSell()`, `MATSEQSELL`, `MATSELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ`
1403 M*/
1404 
1405 /*@C
1406    MatCreateSELL - Creates a sparse parallel matrix in `MATSELL` format.
1407 
1408    Collective
1409 
1410    Input Parameters:
1411 +  comm - MPI communicator
1412 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
1413            This value should be the same as the local size used in creating the
1414            y vector for the matrix-vector product y = Ax.
1415 .  n - This value should be the same as the local size used in creating the
1416        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
1417        calculated if `N` is given) For square matrices n is almost always `m`.
1418 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1419 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1420 .  d_rlenmax - max number of nonzeros per row in DIAGONAL portion of local submatrix
1421                (same value is used for all local rows)
1422 .  d_rlen - array containing the number of nonzeros in the various rows of the
1423             DIAGONAL portion of the local submatrix (possibly different for each row)
1424             or `NULL`, if d_rlenmax is used to specify the nonzero structure.
1425             The size of this array is equal to the number of local rows, i.e `m`.
1426 .  o_rlenmax - max number of nonzeros per row in the OFF-DIAGONAL portion of local
1427                submatrix (same value is used for all local rows).
1428 -  o_rlen - array containing the number of nonzeros in the various rows of the
1429             OFF-DIAGONAL portion of the local submatrix (possibly different for
1430             each row) or `NULL`, if `o_rlenmax` is used to specify the nonzero
1431             structure. The size of this array is equal to the number
1432             of local rows, i.e `m`.
1433 
1434    Output Parameter:
1435 .  A - the matrix
1436 
1437    Options Database Key:
1438 -  -mat_sell_oneindex - Internally use indexing starting at 1
1439         rather than 0.  When calling `MatSetValues()`,
1440         the user still MUST index entries starting at 0!
1441 
1442    Example:
1443    Consider the following 8x8 matrix with 34 non-zero values, that is
1444    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1445    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1446    as follows
1447 
1448 .vb
1449             1  2  0  |  0  3  0  |  0  4
1450     Proc0   0  5  6  |  7  0  0  |  8  0
1451             9  0 10  | 11  0  0  | 12  0
1452     -------------------------------------
1453            13  0 14  | 15 16 17  |  0  0
1454     Proc1   0 18  0  | 19 20 21  |  0  0
1455             0  0  0  | 22 23  0  | 24  0
1456     -------------------------------------
1457     Proc2  25 26 27  |  0  0 28  | 29  0
1458            30  0  0  | 31 32 33  |  0 34
1459 .ve
1460 
1461    This can be represented as a collection of submatrices as
1462 .vb
1463       A B C
1464       D E F
1465       G H I
1466 .ve
1467 
1468    Where the submatrices A,B,C are owned by proc0, D,E,F are
1469    owned by proc1, G,H,I are owned by proc2.
1470 
1471    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1472    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1473    The 'M','N' parameters are 8,8, and have the same values on all procs.
1474 
1475    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1476    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1477    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1478    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1479    part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL`
1480    matrix, ans [DF] as another `MATSEQSELL` matrix.
1481 
1482    When d_rlenmax, o_rlenmax parameters are specified, d_rlenmax storage elements are
1483    allocated for every row of the local diagonal submatrix, and o_rlenmax
1484    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1485    One way to choose d_rlenmax and o_rlenmax is to use the max nonzerors per local
1486    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1487    In this case, the values of d_rlenmax,o_rlenmax are
1488 .vb
1489      proc0 - d_rlenmax = 2, o_rlenmax = 2
1490      proc1 - d_rlenmax = 3, o_rlenmax = 2
1491      proc2 - d_rlenmax = 1, o_rlenmax = 4
1492 .ve
1493    We are allocating m*(d_rlenmax+o_rlenmax) storage locations for every proc. This
1494    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1495    for proc3. i.e we are using 12+15+10=37 storage locations to store
1496    34 values.
1497 
1498    When `d_rlen`, `o_rlen` parameters are specified, the storage is specified
1499    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1500    In the above case the values for `d_nnz`, `o_nnz` are
1501 .vb
1502      proc0 - d_nnz = [2,2,2] and o_nnz = [2,2,2]
1503      proc1 - d_nnz = [3,3,2] and o_nnz = [2,1,1]
1504      proc2 - d_nnz = [1,1]   and o_nnz = [4,4]
1505 .ve
1506    Here the space allocated is still 37 though there are 34 nonzeros because
1507    the allocation is always done according to rlenmax.
1508 
1509    Level: intermediate
1510 
1511    Notes:
1512    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1513    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1514    [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`]
1515 
1516    If the *_rlen parameter is given then the *_rlenmax parameter is ignored
1517 
1518    `m`, `n`, `M`, `N` parameters specify the size of the matrix, and its partitioning across
1519    processors, while `d_rlenmax`, `d_rlen`, `o_rlenmax` , `o_rlen` parameters specify the approximate
1520    storage requirements for this matrix.
1521 
1522    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
1523    processor than it must be used on all processors that share the object for
1524    that argument.
1525 
1526    The user MUST specify either the local or global matrix dimensions
1527    (possibly both).
1528 
1529    The parallel matrix is partitioned across processors such that the
1530    first m0 rows belong to process 0, the next m1 rows belong to
1531    process 1, the next m2 rows belong to process 2 etc.. where
1532    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
1533    values corresponding to [`m` x `N`] submatrix.
1534 
1535    The columns are logically partitioned with the n0 columns belonging
1536    to 0th partition, the next n1 columns belonging to the next
1537    partition etc.. where n0,n1,n2... are the input parameter `n`.
1538 
1539    The DIAGONAL portion of the local submatrix on any given processor
1540    is the submatrix corresponding to the rows and columns `m`, `n`
1541    corresponding to the given processor. i.e diagonal matrix on
1542    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
1543    etc. The remaining portion of the local submatrix [m x (N-n)]
1544    constitute the OFF-DIAGONAL portion. The example below better
1545    illustrates this concept.
1546 
1547    For a square global matrix we define each processor's diagonal portion
1548    to be its local rows and the corresponding columns (a square submatrix);
1549    each processor's off-diagonal portion encompasses the remainder of the
1550    local matrix (a rectangular submatrix).
1551 
1552    If `o_rlen`, `d_rlen` are specified, then `o_rlenmax`, and `d_rlenmax` are ignored.
1553 
1554    When calling this routine with a single process communicator, a matrix of
1555    type `MATSEQSELL` is returned.  If a matrix of type `MATMPISELL` is desired for this
1556    type of communicator, use the construction mechanism
1557 .vb
1558    MatCreate(...,&A);
1559    MatSetType(A,MATMPISELL);
1560    MatSetSizes(A, m,n,M,N);
1561    MatMPISELLSetPreallocation(A,...);
1562 .ve
1563 
1564 .seealso: `Mat`, `MATSELL`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatMPISELLSetPreallocation()`, `MatMPISELLSetPreallocationSELL()`,
1565           `MATMPISELL`, `MatCreateMPISELLWithArrays()`
1566 @*/
1567 PetscErrorCode MatCreateSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[], Mat *A)
1568 {
1569   PetscMPIInt size;
1570 
1571   PetscFunctionBegin;
1572   PetscCall(MatCreate(comm, A));
1573   PetscCall(MatSetSizes(*A, m, n, M, N));
1574   PetscCallMPI(MPI_Comm_size(comm, &size));
1575   if (size > 1) {
1576     PetscCall(MatSetType(*A, MATMPISELL));
1577     PetscCall(MatMPISELLSetPreallocation(*A, d_rlenmax, d_rlen, o_rlenmax, o_rlen));
1578   } else {
1579     PetscCall(MatSetType(*A, MATSEQSELL));
1580     PetscCall(MatSeqSELLSetPreallocation(*A, d_rlenmax, d_rlen));
1581   }
1582   PetscFunctionReturn(PETSC_SUCCESS);
1583 }
1584 
1585 PetscErrorCode MatMPISELLGetSeqSELL(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
1586 {
1587   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1588   PetscBool    flg;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &flg));
1592   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPISELL matrix as input");
1593   if (Ad) *Ad = a->A;
1594   if (Ao) *Ao = a->B;
1595   if (colmap) *colmap = a->garray;
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 /*@C
1600      MatMPISELLGetLocalMatCondensed - Creates a `MATSEQSELL` matrix from an `MATMPISELL` matrix by taking all its local rows and NON-ZERO columns
1601 
1602     Not Collective
1603 
1604    Input Parameters:
1605 +    A - the matrix
1606 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
1607 .    row - index sets of rows to extract (or `NULL`)
1608 -    col - index sets of columns to extract (or `NULL`)
1609 
1610    Output Parameter:
1611 .    A_loc - the local sequential matrix generated
1612 
1613     Level: developer
1614 
1615 .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`, `MatGetOwnershipRange()`, `MatMPISELLGetLocalMat()`
1616 @*/
1617 PetscErrorCode MatMPISELLGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
1618 {
1619   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1620   PetscInt     i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
1621   IS           isrowa, iscola;
1622   Mat         *aloc;
1623   PetscBool    match;
1624 
1625   PetscFunctionBegin;
1626   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &match));
1627   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPISELL matrix as input");
1628   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1629   if (!row) {
1630     start = A->rmap->rstart;
1631     end   = A->rmap->rend;
1632     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
1633   } else {
1634     isrowa = *row;
1635   }
1636   if (!col) {
1637     start = A->cmap->rstart;
1638     cmap  = a->garray;
1639     nzA   = a->A->cmap->n;
1640     nzB   = a->B->cmap->n;
1641     PetscCall(PetscMalloc1(nzA + nzB, &idx));
1642     ncols = 0;
1643     for (i = 0; i < nzB; i++) {
1644       if (cmap[i] < start) idx[ncols++] = cmap[i];
1645       else break;
1646     }
1647     imark = i;
1648     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
1649     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
1650     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
1651   } else {
1652     iscola = *col;
1653   }
1654   if (scall != MAT_INITIAL_MATRIX) {
1655     PetscCall(PetscMalloc1(1, &aloc));
1656     aloc[0] = *A_loc;
1657   }
1658   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
1659   *A_loc = aloc[0];
1660   PetscCall(PetscFree(aloc));
1661   if (!row) PetscCall(ISDestroy(&isrowa));
1662   if (!col) PetscCall(ISDestroy(&iscola));
1663   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
1664   PetscFunctionReturn(PETSC_SUCCESS);
1665 }
1666 
1667 #include <../src/mat/impls/aij/mpi/mpiaij.h>
1668 
1669 PetscErrorCode MatConvert_MPISELL_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1670 {
1671   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
1672   Mat          B;
1673   Mat_MPIAIJ  *b;
1674 
1675   PetscFunctionBegin;
1676   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1677 
1678   if (reuse == MAT_REUSE_MATRIX) {
1679     B = *newmat;
1680   } else {
1681     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1682     PetscCall(MatSetType(B, MATMPIAIJ));
1683     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1684     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1685     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
1686     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
1687   }
1688   b = (Mat_MPIAIJ *)B->data;
1689 
1690   if (reuse == MAT_REUSE_MATRIX) {
1691     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
1692     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
1693   } else {
1694     PetscCall(MatDestroy(&b->A));
1695     PetscCall(MatDestroy(&b->B));
1696     PetscCall(MatDisAssemble_MPISELL(A));
1697     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
1698     PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
1699     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1700     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1701     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1702     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1703   }
1704 
1705   if (reuse == MAT_INPLACE_MATRIX) {
1706     PetscCall(MatHeaderReplace(A, &B));
1707   } else {
1708     *newmat = B;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1714 {
1715   Mat_MPIAIJ  *a = (Mat_MPIAIJ *)A->data;
1716   Mat          B;
1717   Mat_MPISELL *b;
1718 
1719   PetscFunctionBegin;
1720   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
1721 
1722   if (reuse == MAT_REUSE_MATRIX) {
1723     B = *newmat;
1724   } else {
1725     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)a->A->data, *Ba = (Mat_SeqAIJ *)a->B->data;
1726     PetscInt    i, d_nz = 0, o_nz = 0, m = A->rmap->N, n = A->cmap->N, lm = A->rmap->n, ln = A->cmap->n;
1727     PetscInt   *d_nnz, *o_nnz;
1728     PetscCall(PetscMalloc2(lm, &d_nnz, lm, &o_nnz));
1729     for (i = 0; i < lm; i++) {
1730       d_nnz[i] = Aa->i[i + 1] - Aa->i[i];
1731       o_nnz[i] = Ba->i[i + 1] - Ba->i[i];
1732       if (d_nnz[i] > d_nz) d_nz = d_nnz[i];
1733       if (o_nnz[i] > o_nz) o_nz = o_nnz[i];
1734     }
1735     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1736     PetscCall(MatSetType(B, MATMPISELL));
1737     PetscCall(MatSetSizes(B, lm, ln, m, n));
1738     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
1739     PetscCall(MatSeqSELLSetPreallocation(B, d_nz, d_nnz));
1740     PetscCall(MatMPISELLSetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz));
1741     PetscCall(PetscFree2(d_nnz, o_nnz));
1742   }
1743   b = (Mat_MPISELL *)B->data;
1744 
1745   if (reuse == MAT_REUSE_MATRIX) {
1746     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_REUSE_MATRIX, &b->A));
1747     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_REUSE_MATRIX, &b->B));
1748   } else {
1749     PetscCall(MatDestroy(&b->A));
1750     PetscCall(MatDestroy(&b->B));
1751     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_INITIAL_MATRIX, &b->A));
1752     PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_INITIAL_MATRIX, &b->B));
1753     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1754     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1755     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1756     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1757   }
1758 
1759   if (reuse == MAT_INPLACE_MATRIX) {
1760     PetscCall(MatHeaderReplace(A, &B));
1761   } else {
1762     *newmat = B;
1763   }
1764   PetscFunctionReturn(PETSC_SUCCESS);
1765 }
1766 
1767 PetscErrorCode MatSOR_MPISELL(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1768 {
1769   Mat_MPISELL *mat = (Mat_MPISELL *)matin->data;
1770   Vec          bb1 = NULL;
1771 
1772   PetscFunctionBegin;
1773   if (flag == SOR_APPLY_UPPER) {
1774     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1775     PetscFunctionReturn(PETSC_SUCCESS);
1776   }
1777 
1778   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1779 
1780   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1781     if (flag & SOR_ZERO_INITIAL_GUESS) {
1782       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1783       its--;
1784     }
1785 
1786     while (its--) {
1787       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1788       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1789 
1790       /* update rhs: bb1 = bb - B*x */
1791       PetscCall(VecScale(mat->lvec, -1.0));
1792       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1793 
1794       /* local sweep */
1795       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1796     }
1797   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1798     if (flag & SOR_ZERO_INITIAL_GUESS) {
1799       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1800       its--;
1801     }
1802     while (its--) {
1803       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1804       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1805 
1806       /* update rhs: bb1 = bb - B*x */
1807       PetscCall(VecScale(mat->lvec, -1.0));
1808       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1809 
1810       /* local sweep */
1811       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1812     }
1813   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1814     if (flag & SOR_ZERO_INITIAL_GUESS) {
1815       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1816       its--;
1817     }
1818     while (its--) {
1819       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1820       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1821 
1822       /* update rhs: bb1 = bb - B*x */
1823       PetscCall(VecScale(mat->lvec, -1.0));
1824       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1825 
1826       /* local sweep */
1827       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1828     }
1829   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1830 
1831   PetscCall(VecDestroy(&bb1));
1832 
1833   matin->factorerrortype = mat->A->factorerrortype;
1834   PetscFunctionReturn(PETSC_SUCCESS);
1835 }
1836 
1837 #if defined(PETSC_HAVE_CUDA)
1838 PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat, MatType, MatReuse, Mat *);
1839 #endif
1840 
1841 /*MC
1842    MATMPISELL - MATMPISELL = "MPISELL" - A matrix type to be used for parallel sparse matrices.
1843 
1844    Options Database Keys:
1845 . -mat_type mpisell - sets the matrix type to `MATMPISELL` during a call to `MatSetFromOptions()`
1846 
1847   Level: beginner
1848 
1849 .seealso: `Mat`, `MATSELL`, `MATSEQSELL` `MatCreateSELL()`
1850 M*/
1851 PETSC_EXTERN PetscErrorCode MatCreate_MPISELL(Mat B)
1852 {
1853   Mat_MPISELL *b;
1854   PetscMPIInt  size;
1855 
1856   PetscFunctionBegin;
1857   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1858   PetscCall(PetscNew(&b));
1859   B->data       = (void *)b;
1860   B->ops[0]     = MatOps_Values;
1861   B->assembled  = PETSC_FALSE;
1862   B->insertmode = NOT_SET_VALUES;
1863   b->size       = size;
1864   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
1865   /* build cache for off array entries formed */
1866   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
1867 
1868   b->donotstash  = PETSC_FALSE;
1869   b->colmap      = NULL;
1870   b->garray      = NULL;
1871   b->roworiented = PETSC_TRUE;
1872 
1873   /* stuff used for matrix vector multiply */
1874   b->lvec  = NULL;
1875   b->Mvctx = NULL;
1876 
1877   /* stuff for MatGetRow() */
1878   b->rowindices   = NULL;
1879   b->rowvalues    = NULL;
1880   b->getrowactive = PETSC_FALSE;
1881 
1882   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISELL));
1883   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISELL));
1884   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPISELL));
1885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELL));
1886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpiaij_C", MatConvert_MPISELL_MPIAIJ));
1887 #if defined(PETSC_HAVE_CUDA)
1888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpisellcuda_C", MatConvert_MPISELL_MPISELLCUDA));
1889 #endif
1890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPISELL));
1891   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISELL));
1892   PetscFunctionReturn(PETSC_SUCCESS);
1893 }
1894