xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
12               matrix that represents the operator. For sequential matrices it returns itself.
13 
14     Input Parameter:
15 .      A - the sequential or MPI dense matrix
16 
17     Output Parameter:
18 .      B - the inner matrix
19 
20     Level: intermediate
21 
22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) {
25   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
26   PetscBool     flg;
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
30   PetscValidPointer(B, 2);
31   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
32   if (flg) *B = mat->A;
33   else {
34     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
35     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) {
42   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
43   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
44 
45   PetscFunctionBegin;
46   PetscCall(MatCopy(Amat->A, Bmat->A, s));
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) {
51   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
52   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
53   PetscScalar  *v;
54 
55   PetscFunctionBegin;
56   PetscCall(MatDenseGetArray(mat->A, &v));
57   PetscCall(MatDenseGetLDA(mat->A, &lda));
58   rend2 = PetscMin(rend, A->cmap->N);
59   if (rend2 > rstart) {
60     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
61     PetscCall(PetscLogFlops(rend2 - rstart));
62   }
63   PetscCall(MatDenseRestoreArray(mat->A, &v));
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
68   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
69   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
70 
71   PetscFunctionBegin;
72   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
73   lrow = row - rstart;
74   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
79   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
80   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
81 
82   PetscFunctionBegin;
83   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
84   lrow = row - rstart;
85   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) {
90   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
91   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
92   PetscScalar  *array;
93   MPI_Comm      comm;
94   PetscBool     flg;
95   Mat           B;
96 
97   PetscFunctionBegin;
98   PetscCall(MatHasCongruentLayouts(A, &flg));
99   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
100   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
101   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
102 
103     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
104     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
105     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
106     PetscCall(MatCreate(comm, &B));
107     PetscCall(MatSetSizes(B, m, m, m, m));
108     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
109     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
110     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
111     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
112     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
113     *a = B;
114     PetscCall(MatDestroy(&B));
115   } else *a = B;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) {
120   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
121   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
122   PetscBool     roworiented = A->roworiented;
123 
124   PetscFunctionBegin;
125   for (i = 0; i < m; i++) {
126     if (idxm[i] < 0) continue;
127     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
128     if (idxm[i] >= rstart && idxm[i] < rend) {
129       row = idxm[i] - rstart;
130       if (roworiented) {
131         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
132       } else {
133         for (j = 0; j < n; j++) {
134           if (idxn[j] < 0) continue;
135           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
136           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
137         }
138       }
139     } else if (!A->donotstash) {
140       mat->assembled = PETSC_FALSE;
141       if (roworiented) {
142         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
143       } else {
144         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
145       }
146     }
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
152   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
153   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
154 
155   PetscFunctionBegin;
156   for (i = 0; i < m; i++) {
157     if (idxm[i] < 0) continue; /* negative row */
158     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
159     if (idxm[i] >= rstart && idxm[i] < rend) {
160       row = idxm[i] - rstart;
161       for (j = 0; j < n; j++) {
162         if (idxn[j] < 0) continue; /* negative column */
163         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
164         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
165       }
166     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) {
172   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
173 
174   PetscFunctionBegin;
175   PetscCall(MatDenseGetLDA(a->A, lda));
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) {
180   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
181   PetscBool     iscuda;
182 
183   PetscFunctionBegin;
184   if (!a->A) {
185     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
186     PetscCall(PetscLayoutSetUp(A->rmap));
187     PetscCall(PetscLayoutSetUp(A->cmap));
188     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
189     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A));
190     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
191     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
192     PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
193   }
194   PetscCall(MatDenseSetLDA(a->A, lda));
195   PetscFunctionReturn(0);
196 }
197 
198 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) {
199   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
200 
201   PetscFunctionBegin;
202   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203   PetscCall(MatDenseGetArray(a->A, array));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) {
208   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
209 
210   PetscFunctionBegin;
211   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
212   PetscCall(MatDenseGetArrayRead(a->A, array));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) {
217   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
218 
219   PetscFunctionBegin;
220   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
221   PetscCall(MatDenseGetArrayWrite(a->A, array));
222   PetscFunctionReturn(0);
223 }
224 
225 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) {
226   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
227 
228   PetscFunctionBegin;
229   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
230   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
231   PetscCall(MatDensePlaceArray(a->A, array));
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) {
236   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
237 
238   PetscFunctionBegin;
239   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
240   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
241   PetscCall(MatDenseResetArray(a->A));
242   PetscFunctionReturn(0);
243 }
244 
245 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) {
246   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
247 
248   PetscFunctionBegin;
249   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
250   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
251   PetscCall(MatDenseReplaceArray(a->A, array));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) {
256   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
257   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
258   const PetscInt    *irow, *icol;
259   const PetscScalar *v;
260   PetscScalar       *bv;
261   Mat                newmat;
262   IS                 iscol_local;
263   MPI_Comm           comm_is, comm_mat;
264 
265   PetscFunctionBegin;
266   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
267   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
268   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
269 
270   PetscCall(ISAllGather(iscol, &iscol_local));
271   PetscCall(ISGetIndices(isrow, &irow));
272   PetscCall(ISGetIndices(iscol_local, &icol));
273   PetscCall(ISGetLocalSize(isrow, &nrows));
274   PetscCall(ISGetLocalSize(iscol, &ncols));
275   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
276 
277   /* No parallel redistribution currently supported! Should really check each index set
278      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
279      original matrix! */
280 
281   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
282   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
283 
284   /* Check submatrix call */
285   if (scall == MAT_REUSE_MATRIX) {
286     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
287     /* Really need to test rows and column sizes! */
288     newmat = *B;
289   } else {
290     /* Create and fill new matrix */
291     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
292     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
293     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
294     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
295   }
296 
297   /* Now extract the data pointers and do the copy, column at a time */
298   newmatd = (Mat_MPIDense *)newmat->data;
299   PetscCall(MatDenseGetArray(newmatd->A, &bv));
300   PetscCall(MatDenseGetArrayRead(mat->A, &v));
301   PetscCall(MatDenseGetLDA(mat->A, &lda));
302   for (i = 0; i < Ncols; i++) {
303     const PetscScalar *av = v + lda * icol[i];
304     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
305   }
306   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
307   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
308 
309   /* Assemble the matrices so that the correct flags are set */
310   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
311   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
312 
313   /* Free work space */
314   PetscCall(ISRestoreIndices(isrow, &irow));
315   PetscCall(ISRestoreIndices(iscol_local, &icol));
316   PetscCall(ISDestroy(&iscol_local));
317   *B = newmat;
318   PetscFunctionReturn(0);
319 }
320 
321 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) {
322   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
323 
324   PetscFunctionBegin;
325   PetscCall(MatDenseRestoreArray(a->A, array));
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) {
330   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
331 
332   PetscFunctionBegin;
333   PetscCall(MatDenseRestoreArrayRead(a->A, array));
334   PetscFunctionReturn(0);
335 }
336 
337 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) {
338   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
339 
340   PetscFunctionBegin;
341   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
342   PetscFunctionReturn(0);
343 }
344 
345 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) {
346   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
347   PetscInt      nstash, reallocs;
348 
349   PetscFunctionBegin;
350   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
351 
352   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
353   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
354   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) {
359   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
360   PetscInt      i, *row, *col, flg, j, rstart, ncols;
361   PetscMPIInt   n;
362   PetscScalar  *val;
363 
364   PetscFunctionBegin;
365   if (!mdn->donotstash && !mat->nooffprocentries) {
366     /*  wait on receives */
367     while (1) {
368       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
369       if (!flg) break;
370 
371       for (i = 0; i < n;) {
372         /* Now identify the consecutive vals belonging to the same row */
373         for (j = i, rstart = row[j]; j < n; j++) {
374           if (row[j] != rstart) break;
375         }
376         if (j < n) ncols = j - i;
377         else ncols = n - i;
378         /* Now assemble all these values with a single function call */
379         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
380         i = j;
381       }
382     }
383     PetscCall(MatStashScatterEnd_Private(&mat->stash));
384   }
385 
386   PetscCall(MatAssemblyBegin(mdn->A, mode));
387   PetscCall(MatAssemblyEnd(mdn->A, mode));
388   PetscFunctionReturn(0);
389 }
390 
391 PetscErrorCode MatZeroEntries_MPIDense(Mat A) {
392   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
393 
394   PetscFunctionBegin;
395   PetscCall(MatZeroEntries(l->A));
396   PetscFunctionReturn(0);
397 }
398 
399 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
400   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
401   PetscInt      i, len, *lrows;
402 
403   PetscFunctionBegin;
404   /* get locally owned rows */
405   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
406   /* fix right hand side if needed */
407   if (x && b) {
408     const PetscScalar *xx;
409     PetscScalar       *bb;
410 
411     PetscCall(VecGetArrayRead(x, &xx));
412     PetscCall(VecGetArrayWrite(b, &bb));
413     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
414     PetscCall(VecRestoreArrayRead(x, &xx));
415     PetscCall(VecRestoreArrayWrite(b, &bb));
416   }
417   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
418   if (diag != 0.0) {
419     Vec d;
420 
421     PetscCall(MatCreateVecs(A, NULL, &d));
422     PetscCall(VecSet(d, diag));
423     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
424     PetscCall(VecDestroy(&d));
425   }
426   PetscCall(PetscFree(lrows));
427   PetscFunctionReturn(0);
428 }
429 
430 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
431 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
432 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
433 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
434 
435 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) {
436   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
437   const PetscScalar *ax;
438   PetscScalar       *ay;
439   PetscMemType       axmtype, aymtype;
440 
441   PetscFunctionBegin;
442   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
443   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
444   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
445   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
446   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
447   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
448   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
449   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) {
454   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
455   const PetscScalar *ax;
456   PetscScalar       *ay;
457   PetscMemType       axmtype, aymtype;
458 
459   PetscFunctionBegin;
460   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
461   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
462   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
463   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
464   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
465   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
466   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
467   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
468   PetscFunctionReturn(0);
469 }
470 
471 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) {
472   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
473   const PetscScalar *ax;
474   PetscScalar       *ay;
475   PetscMemType       axmtype, aymtype;
476 
477   PetscFunctionBegin;
478   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
479   PetscCall(VecSet(yy, 0.0));
480   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
481   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
483   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
484   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
485   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
486   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
487   PetscFunctionReturn(0);
488 }
489 
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) {
491   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
492   const PetscScalar *ax;
493   PetscScalar       *ay;
494   PetscMemType       axmtype, aymtype;
495 
496   PetscFunctionBegin;
497   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
498   PetscCall(VecCopy(yy, zz));
499   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
500   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
502   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
503   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
504   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
505   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) {
510   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
511   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
512   PetscScalar       *x, zero = 0.0;
513   const PetscScalar *av;
514 
515   PetscFunctionBegin;
516   PetscCall(VecSet(v, zero));
517   PetscCall(VecGetArray(v, &x));
518   PetscCall(VecGetSize(v, &n));
519   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
520   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
521   radd = A->rmap->rstart * m;
522   PetscCall(MatDenseGetArrayRead(a->A, &av));
523   PetscCall(MatDenseGetLDA(a->A, &lda));
524   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
525   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
526   PetscCall(VecRestoreArray(v, &x));
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatDestroy_MPIDense(Mat mat) {
531   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
532 
533   PetscFunctionBegin;
534 #if defined(PETSC_USE_LOG)
535   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
536 #endif
537   PetscCall(MatStashDestroy_Private(&mat->stash));
538   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
539   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
540   PetscCall(MatDestroy(&mdn->A));
541   PetscCall(VecDestroy(&mdn->lvec));
542   PetscCall(PetscSFDestroy(&mdn->Mvctx));
543   PetscCall(VecDestroy(&mdn->cvec));
544   PetscCall(MatDestroy(&mdn->cmat));
545 
546   PetscCall(PetscFree(mat->data));
547   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
548 
549   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
550   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
551   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
552   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
553   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
554   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
555   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
556   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
557   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
558   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
559   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
560   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
561   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
562 #if defined(PETSC_HAVE_ELEMENTAL)
563   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
564 #endif
565 #if defined(PETSC_HAVE_SCALAPACK)
566   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
567 #endif
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
570   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
571 #if defined(PETSC_HAVE_CUDA)
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
578   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
580   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
581   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
582   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
589 #endif
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
600 
601   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
602   PetscFunctionReturn(0);
603 }
604 
605 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
606 
607 #include <petscdraw.h>
608 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
609   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
610   PetscMPIInt       rank;
611   PetscViewerType   vtype;
612   PetscBool         iascii, isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
618   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
619   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
620   if (iascii) {
621     PetscCall(PetscViewerGetType(viewer, &vtype));
622     PetscCall(PetscViewerGetFormat(viewer, &format));
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
626       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
627       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
628                                                    (PetscInt)info.memory));
629       PetscCall(PetscViewerFlush(viewer));
630       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
631       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
632       PetscFunctionReturn(0);
633     } else if (format == PETSC_VIEWER_ASCII_INFO) {
634       PetscFunctionReturn(0);
635     }
636   } else if (isdraw) {
637     PetscDraw draw;
638     PetscBool isnull;
639 
640     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
641     PetscCall(PetscDrawIsNull(draw, &isnull));
642     if (isnull) PetscFunctionReturn(0);
643   }
644 
645   {
646     /* assemble the entire matrix onto first processor. */
647     Mat          A;
648     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
649     PetscInt    *cols;
650     PetscScalar *vals;
651 
652     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
653     if (rank == 0) {
654       PetscCall(MatSetSizes(A, M, N, M, N));
655     } else {
656       PetscCall(MatSetSizes(A, 0, 0, M, N));
657     }
658     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
659     PetscCall(MatSetType(A, MATMPIDENSE));
660     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
661     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A));
662 
663     /* Copy the matrix ... This isn't the most efficient means,
664        but it's quick for now */
665     A->insertmode = INSERT_VALUES;
666 
667     row = mat->rmap->rstart;
668     m   = mdn->A->rmap->n;
669     for (i = 0; i < m; i++) {
670       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
671       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
672       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
673       row++;
674     }
675 
676     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
677     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
678     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
679     if (rank == 0) {
680       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
681       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
682     }
683     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
684     PetscCall(PetscViewerFlush(viewer));
685     PetscCall(MatDestroy(&A));
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) {
691   PetscBool iascii, isbinary, isdraw, issocket;
692 
693   PetscFunctionBegin;
694   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
695   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
696   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
697   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
698 
699   if (iascii || issocket || isdraw) {
700     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
701   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
702   PetscFunctionReturn(0);
703 }
704 
705 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) {
706   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
707   Mat            mdn = mat->A;
708   PetscLogDouble isend[5], irecv[5];
709 
710   PetscFunctionBegin;
711   info->block_size = 1.0;
712 
713   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
714 
715   isend[0] = info->nz_used;
716   isend[1] = info->nz_allocated;
717   isend[2] = info->nz_unneeded;
718   isend[3] = info->memory;
719   isend[4] = info->mallocs;
720   if (flag == MAT_LOCAL) {
721     info->nz_used      = isend[0];
722     info->nz_allocated = isend[1];
723     info->nz_unneeded  = isend[2];
724     info->memory       = isend[3];
725     info->mallocs      = isend[4];
726   } else if (flag == MAT_GLOBAL_MAX) {
727     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
728 
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
736 
737     info->nz_used      = irecv[0];
738     info->nz_allocated = irecv[1];
739     info->nz_unneeded  = irecv[2];
740     info->memory       = irecv[3];
741     info->mallocs      = irecv[4];
742   }
743   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
744   info->fill_ratio_needed = 0;
745   info->factor_mallocs    = 0;
746   PetscFunctionReturn(0);
747 }
748 
749 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) {
750   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
751 
752   PetscFunctionBegin;
753   switch (op) {
754   case MAT_NEW_NONZERO_LOCATIONS:
755   case MAT_NEW_NONZERO_LOCATION_ERR:
756   case MAT_NEW_NONZERO_ALLOCATION_ERR:
757     MatCheckPreallocated(A, 1);
758     PetscCall(MatSetOption(a->A, op, flg));
759     break;
760   case MAT_ROW_ORIENTED:
761     MatCheckPreallocated(A, 1);
762     a->roworiented = flg;
763     PetscCall(MatSetOption(a->A, op, flg));
764     break;
765   case MAT_FORCE_DIAGONAL_ENTRIES:
766   case MAT_KEEP_NONZERO_PATTERN:
767   case MAT_USE_HASH_TABLE:
768   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
769   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
770   case MAT_SYMMETRIC:
771   case MAT_STRUCTURALLY_SYMMETRIC:
772   case MAT_HERMITIAN:
773   case MAT_SYMMETRY_ETERNAL:
774   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
775   case MAT_SPD:
776   case MAT_IGNORE_LOWER_TRIANGULAR:
777   case MAT_IGNORE_ZERO_ENTRIES:
778   case MAT_SPD_ETERNAL:
779     /* if the diagonal matrix is square it inherits some of the properties above */
780     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
781     break;
782   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) {
788   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
789   const PetscScalar *l;
790   PetscScalar        x, *v, *vv, *r;
791   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
792 
793   PetscFunctionBegin;
794   PetscCall(MatDenseGetArray(mdn->A, &vv));
795   PetscCall(MatDenseGetLDA(mdn->A, &lda));
796   PetscCall(MatGetLocalSize(A, &s2, &s3));
797   if (ll) {
798     PetscCall(VecGetLocalSize(ll, &s2a));
799     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
800     PetscCall(VecGetArrayRead(ll, &l));
801     for (i = 0; i < m; i++) {
802       x = l[i];
803       v = vv + i;
804       for (j = 0; j < n; j++) {
805         (*v) *= x;
806         v += lda;
807       }
808     }
809     PetscCall(VecRestoreArrayRead(ll, &l));
810     PetscCall(PetscLogFlops(1.0 * n * m));
811   }
812   if (rr) {
813     const PetscScalar *ar;
814 
815     PetscCall(VecGetLocalSize(rr, &s3a));
816     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
817     PetscCall(VecGetArrayRead(rr, &ar));
818     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
819     PetscCall(VecGetArray(mdn->lvec, &r));
820     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
821     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
822     PetscCall(VecRestoreArrayRead(rr, &ar));
823     for (i = 0; i < n; i++) {
824       x = r[i];
825       v = vv + i * lda;
826       for (j = 0; j < m; j++) (*v++) *= x;
827     }
828     PetscCall(VecRestoreArray(mdn->lvec, &r));
829     PetscCall(PetscLogFlops(1.0 * n * m));
830   }
831   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) {
836   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
837   PetscInt           i, j;
838   PetscMPIInt        size;
839   PetscReal          sum = 0.0;
840   const PetscScalar *av, *v;
841 
842   PetscFunctionBegin;
843   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
844   v = av;
845   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
846   if (size == 1) {
847     PetscCall(MatNorm(mdn->A, type, nrm));
848   } else {
849     if (type == NORM_FROBENIUS) {
850       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
851         sum += PetscRealPart(PetscConj(*v) * (*v));
852         v++;
853       }
854       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
855       *nrm = PetscSqrtReal(*nrm);
856       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
857     } else if (type == NORM_1) {
858       PetscReal *tmp, *tmp2;
859       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
860       *nrm = 0.0;
861       v    = av;
862       for (j = 0; j < mdn->A->cmap->n; j++) {
863         for (i = 0; i < mdn->A->rmap->n; i++) {
864           tmp[j] += PetscAbsScalar(*v);
865           v++;
866         }
867       }
868       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
869       for (j = 0; j < A->cmap->N; j++) {
870         if (tmp2[j] > *nrm) *nrm = tmp2[j];
871       }
872       PetscCall(PetscFree2(tmp, tmp2));
873       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
874     } else if (type == NORM_INFINITY) { /* max row norm */
875       PetscReal ntemp;
876       PetscCall(MatNorm(mdn->A, type, &ntemp));
877       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
878     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
879   }
880   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) {
885   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
886   Mat           B;
887   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
888   PetscInt      j, i, lda;
889   PetscScalar  *v;
890 
891   PetscFunctionBegin;
892   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
893   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
894     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
895     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
896     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
897     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
898   } else B = *matout;
899 
900   m = a->A->rmap->n;
901   n = a->A->cmap->n;
902   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
903   PetscCall(MatDenseGetLDA(a->A, &lda));
904   PetscCall(PetscMalloc1(m, &rwork));
905   for (i = 0; i < m; i++) rwork[i] = rstart + i;
906   for (j = 0; j < n; j++) {
907     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
908     v += lda;
909   }
910   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
911   PetscCall(PetscFree(rwork));
912   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
913   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
914   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
915     *matout = B;
916   } else {
917     PetscCall(MatHeaderMerge(A, &B));
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
924 
925 PetscErrorCode MatSetUp_MPIDense(Mat A) {
926   PetscFunctionBegin;
927   PetscCall(PetscLayoutSetUp(A->rmap));
928   PetscCall(PetscLayoutSetUp(A->cmap));
929   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
934   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
935 
936   PetscFunctionBegin;
937   PetscCall(MatAXPY(A->A, alpha, B->A, str));
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatConjugate_MPIDense(Mat mat) {
942   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
943 
944   PetscFunctionBegin;
945   PetscCall(MatConjugate(a->A));
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatRealPart_MPIDense(Mat A) {
950   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
951 
952   PetscFunctionBegin;
953   PetscCall(MatRealPart(a->A));
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959 
960   PetscFunctionBegin;
961   PetscCall(MatImaginaryPart(a->A));
962   PetscFunctionReturn(0);
963 }
964 
965 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) {
966   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
967 
968   PetscFunctionBegin;
969   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
970   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
971   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
972   PetscFunctionReturn(0);
973 }
974 
975 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
976 
977 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) {
978   PetscInt      i, m, n;
979   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
980   PetscReal    *work;
981 
982   PetscFunctionBegin;
983   PetscCall(MatGetSize(A, &m, &n));
984   PetscCall(PetscMalloc1(n, &work));
985   if (type == REDUCTION_MEAN_REALPART) {
986     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
987   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
988     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
989   } else {
990     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
991   }
992   if (type == NORM_2) {
993     for (i = 0; i < n; i++) work[i] *= work[i];
994   }
995   if (type == NORM_INFINITY) {
996     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
997   } else {
998     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
999   }
1000   PetscCall(PetscFree(work));
1001   if (type == NORM_2) {
1002     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1003   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1004     for (i = 0; i < n; i++) reductions[i] /= m;
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #if defined(PETSC_HAVE_CUDA)
1010 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) {
1011   PetscScalar *da;
1012   PetscInt     lda;
1013 
1014   PetscFunctionBegin;
1015   PetscCall(MatDenseCUDAGetArray(A, &da));
1016   PetscCall(MatDenseGetLDA(A, &lda));
1017   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1018   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1019   PetscCall(MatDenseCUDARestoreArray(A, &da));
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1024   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1025   PetscInt      lda;
1026 
1027   PetscFunctionBegin;
1028   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1029   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1030   if (!a->cvec) {
1031     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1032     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1033   }
1034   a->vecinuse = col + 1;
1035   PetscCall(MatDenseGetLDA(a->A, &lda));
1036   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1037   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1038   *v = a->cvec;
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1043   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1044 
1045   PetscFunctionBegin;
1046   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1047   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1048   a->vecinuse = 0;
1049   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1050   PetscCall(VecCUDAResetArray(a->cvec));
1051   if (v) *v = NULL;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1056   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1057   PetscInt      lda;
1058 
1059   PetscFunctionBegin;
1060   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1061   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1062   if (!a->cvec) {
1063     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1064     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1065   }
1066   a->vecinuse = col + 1;
1067   PetscCall(MatDenseGetLDA(a->A, &lda));
1068   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1069   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1070   PetscCall(VecLockReadPush(a->cvec));
1071   *v = a->cvec;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1076   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1077 
1078   PetscFunctionBegin;
1079   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1080   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1081   a->vecinuse = 0;
1082   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1083   PetscCall(VecLockReadPop(a->cvec));
1084   PetscCall(VecCUDAResetArray(a->cvec));
1085   if (v) *v = NULL;
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1090   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1091   PetscInt      lda;
1092 
1093   PetscFunctionBegin;
1094   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1095   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1096   if (!a->cvec) {
1097     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1098     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1099   }
1100   a->vecinuse = col + 1;
1101   PetscCall(MatDenseGetLDA(a->A, &lda));
1102   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1103   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1104   *v = a->cvec;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1109   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1113   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1114   a->vecinuse = 0;
1115   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1116   PetscCall(VecCUDAResetArray(a->cvec));
1117   if (v) *v = NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1122   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1123 
1124   PetscFunctionBegin;
1125   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1126   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1127   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) {
1132   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1133 
1134   PetscFunctionBegin;
1135   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1136   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1137   PetscCall(MatDenseCUDAResetArray(l->A));
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1142   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1143 
1144   PetscFunctionBegin;
1145   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1146   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1147   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1152   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1160   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1168   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1176   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1184   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatDenseCUDAGetArray(l->A, a));
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1192   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1200 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1201 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1202 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1203 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1204 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1205 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1206 
1207 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) {
1208   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1209 
1210   PetscFunctionBegin;
1211   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1212   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1213   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1214   mat->boundtocpu = bind;
1215   if (!bind) {
1216     PetscBool iscuda;
1217 
1218     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1219     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1220     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1221     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1222     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1223     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1224     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1225     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1226     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1227     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1228     mat->ops->shift = MatShift_MPIDenseCUDA;
1229   } else {
1230     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1231     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1232     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1233     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1234     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1235     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1236     mat->ops->shift = MatShift_MPIDense;
1237   }
1238   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) {
1243   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1244   PetscBool     iscuda;
1245 
1246   PetscFunctionBegin;
1247   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1248   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1249   if (!iscuda) PetscFunctionReturn(0);
1250   PetscCall(PetscLayoutSetUp(A->rmap));
1251   PetscCall(PetscLayoutSetUp(A->cmap));
1252   if (!d->A) {
1253     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1254     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A));
1255     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1256   }
1257   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1258   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1259   A->preallocated = PETSC_TRUE;
1260   A->assembled    = PETSC_TRUE;
1261   PetscFunctionReturn(0);
1262 }
1263 #endif
1264 
1265 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) {
1266   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1267 
1268   PetscFunctionBegin;
1269   PetscCall(MatSetRandom(d->A, rctx));
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) {
1274   PetscFunctionBegin;
1275   *missing = PETSC_FALSE;
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1280 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1281 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1282 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1283 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1284 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1285 
1286 /* -------------------------------------------------------------------*/
1287 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1288                                        MatGetRow_MPIDense,
1289                                        MatRestoreRow_MPIDense,
1290                                        MatMult_MPIDense,
1291                                        /*  4*/ MatMultAdd_MPIDense,
1292                                        MatMultTranspose_MPIDense,
1293                                        MatMultTransposeAdd_MPIDense,
1294                                        NULL,
1295                                        NULL,
1296                                        NULL,
1297                                        /* 10*/ NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        NULL,
1301                                        MatTranspose_MPIDense,
1302                                        /* 15*/ MatGetInfo_MPIDense,
1303                                        MatEqual_MPIDense,
1304                                        MatGetDiagonal_MPIDense,
1305                                        MatDiagonalScale_MPIDense,
1306                                        MatNorm_MPIDense,
1307                                        /* 20*/ MatAssemblyBegin_MPIDense,
1308                                        MatAssemblyEnd_MPIDense,
1309                                        MatSetOption_MPIDense,
1310                                        MatZeroEntries_MPIDense,
1311                                        /* 24*/ MatZeroRows_MPIDense,
1312                                        NULL,
1313                                        NULL,
1314                                        NULL,
1315                                        NULL,
1316                                        /* 29*/ MatSetUp_MPIDense,
1317                                        NULL,
1318                                        NULL,
1319                                        MatGetDiagonalBlock_MPIDense,
1320                                        NULL,
1321                                        /* 34*/ MatDuplicate_MPIDense,
1322                                        NULL,
1323                                        NULL,
1324                                        NULL,
1325                                        NULL,
1326                                        /* 39*/ MatAXPY_MPIDense,
1327                                        MatCreateSubMatrices_MPIDense,
1328                                        NULL,
1329                                        MatGetValues_MPIDense,
1330                                        MatCopy_MPIDense,
1331                                        /* 44*/ NULL,
1332                                        MatScale_MPIDense,
1333                                        MatShift_MPIDense,
1334                                        NULL,
1335                                        NULL,
1336                                        /* 49*/ MatSetRandom_MPIDense,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        /* 54*/ NULL,
1342                                        NULL,
1343                                        NULL,
1344                                        NULL,
1345                                        NULL,
1346                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1347                                        MatDestroy_MPIDense,
1348                                        MatView_MPIDense,
1349                                        NULL,
1350                                        NULL,
1351                                        /* 64*/ NULL,
1352                                        NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        /* 69*/ NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        /* 74*/ NULL,
1362                                        NULL,
1363                                        NULL,
1364                                        NULL,
1365                                        NULL,
1366                                        /* 79*/ NULL,
1367                                        NULL,
1368                                        NULL,
1369                                        NULL,
1370                                        /* 83*/ MatLoad_MPIDense,
1371                                        NULL,
1372                                        NULL,
1373                                        NULL,
1374                                        NULL,
1375                                        NULL,
1376                                        /* 89*/ NULL,
1377                                        NULL,
1378                                        NULL,
1379                                        NULL,
1380                                        NULL,
1381                                        /* 94*/ NULL,
1382                                        NULL,
1383                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1384                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1385                                        NULL,
1386                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1387                                        NULL,
1388                                        NULL,
1389                                        MatConjugate_MPIDense,
1390                                        NULL,
1391                                        /*104*/ NULL,
1392                                        MatRealPart_MPIDense,
1393                                        MatImaginaryPart_MPIDense,
1394                                        NULL,
1395                                        NULL,
1396                                        /*109*/ NULL,
1397                                        NULL,
1398                                        NULL,
1399                                        MatGetColumnVector_MPIDense,
1400                                        MatMissingDiagonal_MPIDense,
1401                                        /*114*/ NULL,
1402                                        NULL,
1403                                        NULL,
1404                                        NULL,
1405                                        NULL,
1406                                        /*119*/ NULL,
1407                                        NULL,
1408                                        NULL,
1409                                        NULL,
1410                                        NULL,
1411                                        /*124*/ NULL,
1412                                        MatGetColumnReductions_MPIDense,
1413                                        NULL,
1414                                        NULL,
1415                                        NULL,
1416                                        /*129*/ NULL,
1417                                        NULL,
1418                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1419                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1420                                        NULL,
1421                                        /*134*/ NULL,
1422                                        NULL,
1423                                        NULL,
1424                                        NULL,
1425                                        NULL,
1426                                        /*139*/ NULL,
1427                                        NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        NULL,
1431                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1432                                        /*145*/ NULL,
1433                                        NULL,
1434                                        NULL,
1435                                        NULL,
1436                                        NULL,
1437                                        /*150*/ NULL};
1438 
1439 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) {
1440   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1441   PetscBool     iscuda;
1442 
1443   PetscFunctionBegin;
1444   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1445   PetscCall(PetscLayoutSetUp(mat->rmap));
1446   PetscCall(PetscLayoutSetUp(mat->cmap));
1447   if (!a->A) {
1448     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1449     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
1450     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1451   }
1452   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1453   PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1454   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1455   mat->preallocated = PETSC_TRUE;
1456   mat->assembled    = PETSC_TRUE;
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1461   Mat B, C;
1462 
1463   PetscFunctionBegin;
1464   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1465   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1466   PetscCall(MatDestroy(&C));
1467   if (reuse == MAT_REUSE_MATRIX) {
1468     C = *newmat;
1469   } else C = NULL;
1470   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1471   PetscCall(MatDestroy(&B));
1472   if (reuse == MAT_INPLACE_MATRIX) {
1473     PetscCall(MatHeaderReplace(A, &C));
1474   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1479   Mat B, C;
1480 
1481   PetscFunctionBegin;
1482   PetscCall(MatDenseGetLocalMatrix(A, &C));
1483   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1484   if (reuse == MAT_REUSE_MATRIX) {
1485     C = *newmat;
1486   } else C = NULL;
1487   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1488   PetscCall(MatDestroy(&B));
1489   if (reuse == MAT_INPLACE_MATRIX) {
1490     PetscCall(MatHeaderReplace(A, &C));
1491   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #if defined(PETSC_HAVE_ELEMENTAL)
1496 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1497   Mat          mat_elemental;
1498   PetscScalar *v;
1499   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1500 
1501   PetscFunctionBegin;
1502   if (reuse == MAT_REUSE_MATRIX) {
1503     mat_elemental = *newmat;
1504     PetscCall(MatZeroEntries(*newmat));
1505   } else {
1506     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1507     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1508     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1509     PetscCall(MatSetUp(mat_elemental));
1510     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1511   }
1512 
1513   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1514   for (i = 0; i < N; i++) cols[i] = i;
1515   for (i = 0; i < m; i++) rows[i] = rstart + i;
1516 
1517   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1518   PetscCall(MatDenseGetArray(A, &v));
1519   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1520   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1521   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1522   PetscCall(MatDenseRestoreArray(A, &v));
1523   PetscCall(PetscFree2(rows, cols));
1524 
1525   if (reuse == MAT_INPLACE_MATRIX) {
1526     PetscCall(MatHeaderReplace(A, &mat_elemental));
1527   } else {
1528     *newmat = mat_elemental;
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 #endif
1533 
1534 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) {
1535   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1536 
1537   PetscFunctionBegin;
1538   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) {
1543   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1544 
1545   PetscFunctionBegin;
1546   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
1551   Mat_MPIDense *mat;
1552   PetscInt      m, nloc, N;
1553 
1554   PetscFunctionBegin;
1555   PetscCall(MatGetSize(inmat, &m, &N));
1556   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1557   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1558     PetscInt sum;
1559 
1560     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1561     /* Check sum(n) = N */
1562     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1563     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1564 
1565     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1566     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1567   }
1568 
1569   /* numeric phase */
1570   mat = (Mat_MPIDense *)(*outmat)->data;
1571   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #if defined(PETSC_HAVE_CUDA)
1576 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1577   Mat           B;
1578   Mat_MPIDense *m;
1579 
1580   PetscFunctionBegin;
1581   if (reuse == MAT_INITIAL_MATRIX) {
1582     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1583   } else if (reuse == MAT_REUSE_MATRIX) {
1584     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1585   }
1586 
1587   B = *newmat;
1588   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1589   PetscCall(PetscFree(B->defaultvectype));
1590   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1591   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1597   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1598   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1599   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1600   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1601   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1602   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1603   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1604   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1605   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1606   m = (Mat_MPIDense *)(B)->data;
1607   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1608   B->ops->bindtocpu = NULL;
1609   B->offloadmask    = PETSC_OFFLOAD_CPU;
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1614   Mat           B;
1615   Mat_MPIDense *m;
1616 
1617   PetscFunctionBegin;
1618   if (reuse == MAT_INITIAL_MATRIX) {
1619     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1620   } else if (reuse == MAT_REUSE_MATRIX) {
1621     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1622   }
1623 
1624   B = *newmat;
1625   PetscCall(PetscFree(B->defaultvectype));
1626   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
1627   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
1642   m = (Mat_MPIDense *)(B->data);
1643   if (m->A) {
1644     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
1645     B->offloadmask = PETSC_OFFLOAD_BOTH;
1646   } else {
1647     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1648   }
1649   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
1650 
1651   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1652   PetscFunctionReturn(0);
1653 }
1654 #endif
1655 
1656 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1657   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1658   PetscInt      lda;
1659 
1660   PetscFunctionBegin;
1661   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1662   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1663   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1664   a->vecinuse = col + 1;
1665   PetscCall(MatDenseGetLDA(a->A, &lda));
1666   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1667   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1668   *v = a->cvec;
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1673   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1674 
1675   PetscFunctionBegin;
1676   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1677   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1678   a->vecinuse = 0;
1679   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1680   PetscCall(VecResetArray(a->cvec));
1681   if (v) *v = NULL;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1686   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1687   PetscInt      lda;
1688 
1689   PetscFunctionBegin;
1690   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1691   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1692   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1693   a->vecinuse = col + 1;
1694   PetscCall(MatDenseGetLDA(a->A, &lda));
1695   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1696   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1697   PetscCall(VecLockReadPush(a->cvec));
1698   *v = a->cvec;
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1703   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1704 
1705   PetscFunctionBegin;
1706   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1707   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1708   a->vecinuse = 0;
1709   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1710   PetscCall(VecLockReadPop(a->cvec));
1711   PetscCall(VecResetArray(a->cvec));
1712   if (v) *v = NULL;
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1717   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1718   PetscInt      lda;
1719 
1720   PetscFunctionBegin;
1721   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1722   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1723   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1724   a->vecinuse = col + 1;
1725   PetscCall(MatDenseGetLDA(a->A, &lda));
1726   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1727   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1728   *v = a->cvec;
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1733   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1734 
1735   PetscFunctionBegin;
1736   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1737   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1738   a->vecinuse = 0;
1739   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1740   PetscCall(VecResetArray(a->cvec));
1741   if (v) *v = NULL;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) {
1746   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1747   Mat_MPIDense *c;
1748   MPI_Comm      comm;
1749   PetscInt      pbegin, pend;
1750 
1751   PetscFunctionBegin;
1752   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1753   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1754   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1755   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1756   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1757   if (!a->cmat) {
1758     PetscCall(MatCreate(comm, &a->cmat));
1759     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat));
1760     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1761     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1762     else {
1763       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1764       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1765       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1766     }
1767     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1768     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1769   } else {
1770     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1771     if (same && a->cmat->rmap->N != A->rmap->N) {
1772       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1773       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1774     }
1775     if (!same) {
1776       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1777       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1778       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1779       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1780       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1781     }
1782     if (cend - cbegin != a->cmat->cmap->N) {
1783       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1784       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1785       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1786       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1787     }
1788   }
1789   c = (Mat_MPIDense *)a->cmat->data;
1790   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1791   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1792   a->cmat->preallocated = PETSC_TRUE;
1793   a->cmat->assembled    = PETSC_TRUE;
1794   a->matinuse           = cbegin + 1;
1795   *v                    = a->cmat;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) {
1800   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1801   Mat_MPIDense *c;
1802 
1803   PetscFunctionBegin;
1804   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1805   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1806   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1807   a->matinuse = 0;
1808   c           = (Mat_MPIDense *)a->cmat->data;
1809   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1810   if (v) *v = NULL;
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 /*MC
1815    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1816 
1817    Options Database Keys:
1818 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1819 
1820   Level: beginner
1821 
1822 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1823 M*/
1824 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) {
1825   Mat_MPIDense *a;
1826 
1827   PetscFunctionBegin;
1828   PetscCall(PetscNewLog(mat, &a));
1829   mat->data = (void *)a;
1830   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1831 
1832   mat->insertmode = NOT_SET_VALUES;
1833 
1834   /* build cache for off array entries formed */
1835   a->donotstash = PETSC_FALSE;
1836 
1837   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1838 
1839   /* stuff used for matrix vector multiply */
1840   a->lvec        = NULL;
1841   a->Mvctx       = NULL;
1842   a->roworiented = PETSC_TRUE;
1843 
1844   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1845   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1846   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1847   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1848   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1849   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1850   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1851   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1852   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1853   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1854   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1855   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1856   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1857   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1858   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1859   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1860   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1861   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1862   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1863   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1864   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1865 #if defined(PETSC_HAVE_ELEMENTAL)
1866   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1867 #endif
1868 #if defined(PETSC_HAVE_SCALAPACK)
1869   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1870 #endif
1871 #if defined(PETSC_HAVE_CUDA)
1872   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1873 #endif
1874   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1875   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1876   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1877 #if defined(PETSC_HAVE_CUDA)
1878   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1880 #endif
1881 
1882   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1883   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1884   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 /*MC
1889    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1890 
1891    Options Database Keys:
1892 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
1893 
1894   Level: beginner
1895 
1896 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1897 M*/
1898 #if defined(PETSC_HAVE_CUDA)
1899 #include <petsc/private/deviceimpl.h>
1900 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) {
1901   PetscFunctionBegin;
1902   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1903   PetscCall(MatCreate_MPIDense(B));
1904   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
1905   PetscFunctionReturn(0);
1906 }
1907 #endif
1908 
1909 /*MC
1910    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1911 
1912    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1913    and `MATMPIDENSE` otherwise.
1914 
1915    Options Database Keys:
1916 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1917 
1918   Level: beginner
1919 
1920 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1921 M*/
1922 
1923 /*MC
1924    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1925 
1926    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
1927    and `MATMPIDENSECUDA` otherwise.
1928 
1929    Options Database Keys:
1930 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
1931 
1932   Level: beginner
1933 
1934 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1935 M*/
1936 
1937 /*@C
1938    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1939 
1940    Collective
1941 
1942    Input Parameters:
1943 .  B - the matrix
1944 -  data - optional location of matrix data.  Set data=NULL for PETSc
1945    to control all matrix memory allocation.
1946 
1947    Notes:
1948    The dense format is fully compatible with standard Fortran 77
1949    storage by columns.
1950 
1951    The data input variable is intended primarily for Fortran programmers
1952    who wish to allocate their own matrix memory space.  Most users should
1953    set data=NULL.
1954 
1955    Level: intermediate
1956 
1957 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1958 @*/
1959 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) {
1960   PetscFunctionBegin;
1961   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1962   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 /*@
1967    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1968    array provided by the user. This is useful to avoid copying an array
1969    into a matrix
1970 
1971    Not Collective
1972 
1973    Input Parameters:
1974 +  mat - the matrix
1975 -  array - the array in column major order
1976 
1977    Note:
1978    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1979    freed when the matrix is destroyed.
1980 
1981    Level: developer
1982 
1983 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1984           `MatDenseReplaceArray()`
1985 @*/
1986 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) {
1987   PetscFunctionBegin;
1988   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1989   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1990   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1991 #if defined(PETSC_HAVE_CUDA)
1992   mat->offloadmask = PETSC_OFFLOAD_CPU;
1993 #endif
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 /*@
1998    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1999 
2000    Not Collective
2001 
2002    Input Parameters:
2003 .  mat - the matrix
2004 
2005    Note:
2006    You can only call this after a call to `MatDensePlaceArray()`
2007 
2008    Level: developer
2009 
2010 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2011 @*/
2012 PetscErrorCode MatDenseResetArray(Mat mat) {
2013   PetscFunctionBegin;
2014   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2015   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2016   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 /*@
2021    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2022    array provided by the user. This is useful to avoid copying an array
2023    into a matrix
2024 
2025    Not Collective
2026 
2027    Input Parameters:
2028 +  mat - the matrix
2029 -  array - the array in column major order
2030 
2031    Note:
2032    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2033    freed by the user. It will be freed when the matrix is destroyed.
2034 
2035    Level: developer
2036 
2037 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2038 @*/
2039 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) {
2040   PetscFunctionBegin;
2041   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2042   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2043   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2044 #if defined(PETSC_HAVE_CUDA)
2045   mat->offloadmask = PETSC_OFFLOAD_CPU;
2046 #endif
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #if defined(PETSC_HAVE_CUDA)
2051 /*@C
2052    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2053    array provided by the user. This is useful to avoid copying an array
2054    into a matrix
2055 
2056    Not Collective
2057 
2058    Input Parameters:
2059 +  mat - the matrix
2060 -  array - the array in column major order
2061 
2062    Note:
2063    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2064    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2065 
2066    Level: developer
2067 
2068 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2069 @*/
2070 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) {
2071   PetscFunctionBegin;
2072   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2073   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2074   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2075   mat->offloadmask = PETSC_OFFLOAD_GPU;
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 /*@C
2080    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2081 
2082    Not Collective
2083 
2084    Input Parameters:
2085 .  mat - the matrix
2086 
2087    Note:
2088    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2089 
2090    Level: developer
2091 
2092 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2093 @*/
2094 PetscErrorCode MatDenseCUDAResetArray(Mat mat) {
2095   PetscFunctionBegin;
2096   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2097   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2098   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 /*@C
2103    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2104    array provided by the user. This is useful to avoid copying an array
2105    into a matrix
2106 
2107    Not Collective
2108 
2109    Input Parameters:
2110 +  mat - the matrix
2111 -  array - the array in column major order
2112 
2113    Note:
2114    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2115    The memory passed in CANNOT be freed by the user. It will be freed
2116    when the matrix is destroyed. The array should respect the matrix leading dimension.
2117 
2118    Level: developer
2119 
2120 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2121 @*/
2122 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) {
2123   PetscFunctionBegin;
2124   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2125   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2126   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2127   mat->offloadmask = PETSC_OFFLOAD_GPU;
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 /*@C
2132    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2133 
2134    Not Collective
2135 
2136    Input Parameters:
2137 .  A - the matrix
2138 
2139    Output Parameters
2140 .  array - the GPU array in column major order
2141 
2142    Notes:
2143    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2144 
2145    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2146 
2147    Level: developer
2148 
2149 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2150 @*/
2151 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) {
2152   PetscFunctionBegin;
2153   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2154   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2155   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 /*@C
2160    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2161 
2162    Not Collective
2163 
2164    Input Parameters:
2165 +  A - the matrix
2166 -  array - the GPU array in column major order
2167 
2168    Level: developer
2169 
2170 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2171 @*/
2172 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) {
2173   PetscFunctionBegin;
2174   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2175   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2176   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2177   A->offloadmask = PETSC_OFFLOAD_GPU;
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 /*@C
2182    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2183 
2184    Not Collective
2185 
2186    Input Parameters:
2187 .  A - the matrix
2188 
2189    Output Parameters
2190 .  array - the GPU array in column major order
2191 
2192    Note:
2193    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2194 
2195    Level: developer
2196 
2197 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2198 @*/
2199 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) {
2200   PetscFunctionBegin;
2201   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2202   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 /*@C
2207    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2208 
2209    Not Collective
2210 
2211    Input Parameters:
2212 +  A - the matrix
2213 -  array - the GPU array in column major order
2214 
2215    Note:
2216    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2217 
2218    Level: developer
2219 
2220 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2221 @*/
2222 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) {
2223   PetscFunctionBegin;
2224   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 /*@C
2229    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2230 
2231    Not Collective
2232 
2233    Input Parameters:
2234 .  A - the matrix
2235 
2236    Output Parameters
2237 .  array - the GPU array in column major order
2238 
2239    Note:
2240    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2241 
2242    Level: developer
2243 
2244 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2245 @*/
2246 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) {
2247   PetscFunctionBegin;
2248   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2249   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2250   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 /*@C
2255    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2256 
2257    Not Collective
2258 
2259    Input Parameters:
2260 +  A - the matrix
2261 -  array - the GPU array in column major order
2262 
2263    Level: developer
2264 
2265 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2266 @*/
2267 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) {
2268   PetscFunctionBegin;
2269   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2270   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2271   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2272   A->offloadmask = PETSC_OFFLOAD_GPU;
2273   PetscFunctionReturn(0);
2274 }
2275 #endif
2276 
2277 /*@C
2278    MatCreateDense - Creates a matrix in `MATDENSE` format.
2279 
2280    Collective
2281 
2282    Input Parameters:
2283 +  comm - MPI communicator
2284 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2285 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2286 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2287 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2288 -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2289    to control all matrix memory allocation.
2290 
2291    Output Parameter:
2292 .  A - the matrix
2293 
2294    Notes:
2295    The dense format is fully compatible with standard Fortran 77
2296    storage by columns.
2297 
2298    Although local portions of the matrix are stored in column-major
2299    order, the matrix is partitioned across MPI ranks by row.
2300 
2301    The data input variable is intended primarily for Fortran programmers
2302    who wish to allocate their own matrix memory space.  Most users should
2303    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
2304 
2305    The user MUST specify either the local or global matrix dimensions
2306    (possibly both).
2307 
2308    Level: intermediate
2309 
2310 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2311 @*/
2312 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2313   PetscFunctionBegin;
2314   PetscCall(MatCreate(comm, A));
2315   PetscCall(MatSetSizes(*A, m, n, M, N));
2316   PetscCall(MatSetType(*A, MATDENSE));
2317   PetscCall(MatSeqDenseSetPreallocation(*A, data));
2318   PetscCall(MatMPIDenseSetPreallocation(*A, data));
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #if defined(PETSC_HAVE_CUDA)
2323 /*@C
2324    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
2325 
2326    Collective
2327 
2328    Input Parameters:
2329 +  comm - MPI communicator
2330 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2331 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2332 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2333 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2334 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2335    to control matrix memory allocation.
2336 
2337    Output Parameter:
2338 .  A - the matrix
2339 
2340    Level: intermediate
2341 
2342 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2343 @*/
2344 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2345   PetscFunctionBegin;
2346   PetscCall(MatCreate(comm, A));
2347   PetscValidLogicalCollectiveBool(*A, !!data, 6);
2348   PetscCall(MatSetSizes(*A, m, n, M, N));
2349   PetscCall(MatSetType(*A, MATDENSECUDA));
2350   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
2351   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
2352   PetscFunctionReturn(0);
2353 }
2354 #endif
2355 
2356 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) {
2357   Mat           mat;
2358   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
2359 
2360   PetscFunctionBegin;
2361   *newmat = NULL;
2362   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
2363   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2364   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2365   a = (Mat_MPIDense *)mat->data;
2366 
2367   mat->factortype   = A->factortype;
2368   mat->assembled    = PETSC_TRUE;
2369   mat->preallocated = PETSC_TRUE;
2370 
2371   mat->insertmode = NOT_SET_VALUES;
2372   a->donotstash   = oldmat->donotstash;
2373 
2374   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2375   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2376 
2377   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2378   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
2379 
2380   *newmat = mat;
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) {
2385   PetscBool isbinary;
2386 #if defined(PETSC_HAVE_HDF5)
2387   PetscBool ishdf5;
2388 #endif
2389 
2390   PetscFunctionBegin;
2391   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2392   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2393   /* force binary viewer to load .info file if it has not yet done so */
2394   PetscCall(PetscViewerSetUp(viewer));
2395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2396 #if defined(PETSC_HAVE_HDF5)
2397   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2398 #endif
2399   if (isbinary) {
2400     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2401 #if defined(PETSC_HAVE_HDF5)
2402   } else if (ishdf5) {
2403     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2404 #endif
2405   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) {
2410   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2411   Mat           a, b;
2412 
2413   PetscFunctionBegin;
2414   a = matA->A;
2415   b = matB->A;
2416   PetscCall(MatEqual(a, b, flag));
2417   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) {
2422   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2423 
2424   PetscFunctionBegin;
2425   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2426   PetscCall(MatDestroy(&atb->atb));
2427   PetscCall(PetscFree(atb));
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) {
2432   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2433 
2434   PetscFunctionBegin;
2435   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2436   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2437   PetscCall(PetscFree(abt));
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2442   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2443   Mat_TransMatMultDense *atb;
2444   MPI_Comm               comm;
2445   PetscMPIInt            size, *recvcounts;
2446   PetscScalar           *carray, *sendbuf;
2447   const PetscScalar     *atbarray;
2448   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2449   const PetscInt        *ranges;
2450 
2451   PetscFunctionBegin;
2452   MatCheckProduct(C, 3);
2453   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2454   atb        = (Mat_TransMatMultDense *)C->product->data;
2455   recvcounts = atb->recvcounts;
2456   sendbuf    = atb->sendbuf;
2457 
2458   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2459   PetscCallMPI(MPI_Comm_size(comm, &size));
2460 
2461   /* compute atbarray = aseq^T * bseq */
2462   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2463 
2464   PetscCall(MatGetOwnershipRanges(C, &ranges));
2465 
2466   /* arrange atbarray into sendbuf */
2467   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2468   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2469   for (proc = 0, k = 0; proc < size; proc++) {
2470     for (j = 0; j < cN; j++) {
2471       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2472     }
2473   }
2474   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2475 
2476   /* sum all atbarray to local values of C */
2477   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2478   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2479   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2480   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2481   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2486   MPI_Comm               comm;
2487   PetscMPIInt            size;
2488   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2489   Mat_TransMatMultDense *atb;
2490   PetscBool              cisdense;
2491   PetscInt               i;
2492   const PetscInt        *ranges;
2493 
2494   PetscFunctionBegin;
2495   MatCheckProduct(C, 4);
2496   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2497   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2498   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2499     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2500   }
2501 
2502   /* create matrix product C */
2503   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2504   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2505   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2506   PetscCall(MatSetUp(C));
2507 
2508   /* create data structure for reuse C */
2509   PetscCallMPI(MPI_Comm_size(comm, &size));
2510   PetscCall(PetscNew(&atb));
2511   cM = C->rmap->N;
2512   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2513   PetscCall(MatGetOwnershipRanges(C, &ranges));
2514   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2515 
2516   C->product->data    = atb;
2517   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2522   MPI_Comm               comm;
2523   PetscMPIInt            i, size;
2524   PetscInt               maxRows, bufsiz;
2525   PetscMPIInt            tag;
2526   PetscInt               alg;
2527   Mat_MatTransMultDense *abt;
2528   Mat_Product           *product = C->product;
2529   PetscBool              flg;
2530 
2531   PetscFunctionBegin;
2532   MatCheckProduct(C, 4);
2533   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2534   /* check local size of A and B */
2535   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2536 
2537   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2538   alg = flg ? 0 : 1;
2539 
2540   /* setup matrix product C */
2541   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2542   PetscCall(MatSetType(C, MATMPIDENSE));
2543   PetscCall(MatSetUp(C));
2544   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2545 
2546   /* create data structure for reuse C */
2547   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2548   PetscCallMPI(MPI_Comm_size(comm, &size));
2549   PetscCall(PetscNew(&abt));
2550   abt->tag = tag;
2551   abt->alg = alg;
2552   switch (alg) {
2553   case 1: /* alg: "cyclic" */
2554     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2555     bufsiz = A->cmap->N * maxRows;
2556     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2557     break;
2558   default: /* alg: "allgatherv" */
2559     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2560     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2561     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2562     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2563     break;
2564   }
2565 
2566   C->product->data                = abt;
2567   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2568   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) {
2573   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2574   Mat_MatTransMultDense *abt;
2575   MPI_Comm               comm;
2576   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2577   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2578   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2579   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2580   const PetscScalar     *av, *bv;
2581   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2582   MPI_Request            reqs[2];
2583   const PetscInt        *ranges;
2584 
2585   PetscFunctionBegin;
2586   MatCheckProduct(C, 3);
2587   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2588   abt = (Mat_MatTransMultDense *)C->product->data;
2589   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2590   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2591   PetscCallMPI(MPI_Comm_size(comm, &size));
2592   PetscCall(MatDenseGetArrayRead(a->A, &av));
2593   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2594   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2595   PetscCall(MatDenseGetLDA(a->A, &i));
2596   PetscCall(PetscBLASIntCast(i, &alda));
2597   PetscCall(MatDenseGetLDA(b->A, &i));
2598   PetscCall(PetscBLASIntCast(i, &blda));
2599   PetscCall(MatDenseGetLDA(c->A, &i));
2600   PetscCall(PetscBLASIntCast(i, &clda));
2601   PetscCall(MatGetOwnershipRanges(B, &ranges));
2602   bn = B->rmap->n;
2603   if (blda == bn) {
2604     sendbuf = (PetscScalar *)bv;
2605   } else {
2606     sendbuf = abt->buf[0];
2607     for (k = 0, i = 0; i < cK; i++) {
2608       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2609     }
2610   }
2611   if (size > 1) {
2612     sendto   = (rank + size - 1) % size;
2613     recvfrom = (rank + size + 1) % size;
2614   } else {
2615     sendto = recvfrom = 0;
2616   }
2617   PetscCall(PetscBLASIntCast(cK, &ck));
2618   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2619   recvisfrom = rank;
2620   for (i = 0; i < size; i++) {
2621     /* we have finished receiving in sending, bufs can be read/modified */
2622     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2623     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2624 
2625     if (nextrecvisfrom != rank) {
2626       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2627       sendsiz = cK * bn;
2628       recvsiz = cK * nextbn;
2629       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2630       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2631       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2632     }
2633 
2634     /* local aseq * sendbuf^T */
2635     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2636     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2637 
2638     if (nextrecvisfrom != rank) {
2639       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2640       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2641     }
2642     bn         = nextbn;
2643     recvisfrom = nextrecvisfrom;
2644     sendbuf    = recvbuf;
2645   }
2646   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2647   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2648   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2649   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2650   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) {
2655   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2656   Mat_MatTransMultDense *abt;
2657   MPI_Comm               comm;
2658   PetscMPIInt            size;
2659   PetscScalar           *cv, *sendbuf, *recvbuf;
2660   const PetscScalar     *av, *bv;
2661   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2662   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2663   PetscBLASInt           cm, cn, ck, alda, clda;
2664 
2665   PetscFunctionBegin;
2666   MatCheckProduct(C, 3);
2667   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2668   abt = (Mat_MatTransMultDense *)C->product->data;
2669   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2670   PetscCallMPI(MPI_Comm_size(comm, &size));
2671   PetscCall(MatDenseGetArrayRead(a->A, &av));
2672   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2673   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2674   PetscCall(MatDenseGetLDA(a->A, &i));
2675   PetscCall(PetscBLASIntCast(i, &alda));
2676   PetscCall(MatDenseGetLDA(b->A, &blda));
2677   PetscCall(MatDenseGetLDA(c->A, &i));
2678   PetscCall(PetscBLASIntCast(i, &clda));
2679   /* copy transpose of B into buf[0] */
2680   bn      = B->rmap->n;
2681   sendbuf = abt->buf[0];
2682   recvbuf = abt->buf[1];
2683   for (k = 0, j = 0; j < bn; j++) {
2684     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2685   }
2686   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2687   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2688   PetscCall(PetscBLASIntCast(cK, &ck));
2689   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2690   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2691   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2692   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2693   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2694   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2695   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2696   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2701   Mat_MatTransMultDense *abt;
2702 
2703   PetscFunctionBegin;
2704   MatCheckProduct(C, 3);
2705   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2706   abt = (Mat_MatTransMultDense *)C->product->data;
2707   switch (abt->alg) {
2708   case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break;
2709   default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break;
2710   }
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) {
2715   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatDestroy(&ab->Ce));
2719   PetscCall(MatDestroy(&ab->Ae));
2720   PetscCall(MatDestroy(&ab->Be));
2721   PetscCall(PetscFree(ab));
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 #if defined(PETSC_HAVE_ELEMENTAL)
2726 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2727   Mat_MatMultDense *ab;
2728 
2729   PetscFunctionBegin;
2730   MatCheckProduct(C, 3);
2731   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2732   ab = (Mat_MatMultDense *)C->product->data;
2733   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2734   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2735   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2736   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2741   Mat               Ae, Be, Ce;
2742   Mat_MatMultDense *ab;
2743 
2744   PetscFunctionBegin;
2745   MatCheckProduct(C, 4);
2746   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2747   /* check local size of A and B */
2748   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2749     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2750   }
2751 
2752   /* create elemental matrices Ae and Be */
2753   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2754   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2755   PetscCall(MatSetType(Ae, MATELEMENTAL));
2756   PetscCall(MatSetUp(Ae));
2757   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2758 
2759   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2760   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2761   PetscCall(MatSetType(Be, MATELEMENTAL));
2762   PetscCall(MatSetUp(Be));
2763   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2764 
2765   /* compute symbolic Ce = Ae*Be */
2766   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2767   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2768 
2769   /* setup C */
2770   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2771   PetscCall(MatSetType(C, MATDENSE));
2772   PetscCall(MatSetUp(C));
2773 
2774   /* create data structure for reuse Cdense */
2775   PetscCall(PetscNew(&ab));
2776   ab->Ae = Ae;
2777   ab->Be = Be;
2778   ab->Ce = Ce;
2779 
2780   C->product->data       = ab;
2781   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2782   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2783   PetscFunctionReturn(0);
2784 }
2785 #endif
2786 /* ----------------------------------------------- */
2787 #if defined(PETSC_HAVE_ELEMENTAL)
2788 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) {
2789   PetscFunctionBegin;
2790   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2791   C->ops->productsymbolic = MatProductSymbolic_AB;
2792   PetscFunctionReturn(0);
2793 }
2794 #endif
2795 
2796 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) {
2797   Mat_Product *product = C->product;
2798   Mat          A = product->A, B = product->B;
2799 
2800   PetscFunctionBegin;
2801   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2802     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2803   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2804   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) {
2809   Mat_Product *product     = C->product;
2810   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2811   PetscInt     alg, nalg = 2;
2812   PetscBool    flg = PETSC_FALSE;
2813 
2814   PetscFunctionBegin;
2815   /* Set default algorithm */
2816   alg = 0; /* default is allgatherv */
2817   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2818   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2819 
2820   /* Get runtime option */
2821   if (product->api_user) {
2822     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2823     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2824     PetscOptionsEnd();
2825   } else {
2826     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2827     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2828     PetscOptionsEnd();
2829   }
2830   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2831 
2832   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2833   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) {
2838   Mat_Product *product = C->product;
2839 
2840   PetscFunctionBegin;
2841   switch (product->type) {
2842 #if defined(PETSC_HAVE_ELEMENTAL)
2843   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break;
2844 #endif
2845   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break;
2846   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break;
2847   default: break;
2848   }
2849   PetscFunctionReturn(0);
2850 }
2851