xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
12               matrix that represents the operator. For sequential matrices it returns itself.
13 
14     Input Parameter:
15 .      A - the sequential or MPI dense matrix
16 
17     Output Parameter:
18 .      B - the inner matrix
19 
20     Level: intermediate
21 
22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) {
25   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
26   PetscBool     flg;
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
30   PetscValidPointer(B, 2);
31   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
32   if (flg) *B = mat->A;
33   else {
34     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
35     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) {
42   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
43   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
44 
45   PetscFunctionBegin;
46   PetscCall(MatCopy(Amat->A, Bmat->A, s));
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) {
51   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
52   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
53   PetscScalar  *v;
54 
55   PetscFunctionBegin;
56   PetscCall(MatDenseGetArray(mat->A, &v));
57   PetscCall(MatDenseGetLDA(mat->A, &lda));
58   rend2 = PetscMin(rend, A->cmap->N);
59   if (rend2 > rstart) {
60     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
61     PetscCall(PetscLogFlops(rend2 - rstart));
62   }
63   PetscCall(MatDenseRestoreArray(mat->A, &v));
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
68   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
69   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
70 
71   PetscFunctionBegin;
72   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
73   lrow = row - rstart;
74   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
79   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
80   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
81 
82   PetscFunctionBegin;
83   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
84   lrow = row - rstart;
85   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) {
90   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
91   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
92   PetscScalar  *array;
93   MPI_Comm      comm;
94   PetscBool     flg;
95   Mat           B;
96 
97   PetscFunctionBegin;
98   PetscCall(MatHasCongruentLayouts(A, &flg));
99   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
100   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
101   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
102 
103     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
104     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
105     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
106     PetscCall(MatCreate(comm, &B));
107     PetscCall(MatSetSizes(B, m, m, m, m));
108     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
109     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
110     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
111     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
112     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
113     *a = B;
114     PetscCall(MatDestroy(&B));
115   } else *a = B;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) {
120   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
121   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
122   PetscBool     roworiented = A->roworiented;
123 
124   PetscFunctionBegin;
125   for (i = 0; i < m; i++) {
126     if (idxm[i] < 0) continue;
127     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
128     if (idxm[i] >= rstart && idxm[i] < rend) {
129       row = idxm[i] - rstart;
130       if (roworiented) {
131         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
132       } else {
133         for (j = 0; j < n; j++) {
134           if (idxn[j] < 0) continue;
135           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
136           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
137         }
138       }
139     } else if (!A->donotstash) {
140       mat->assembled = PETSC_FALSE;
141       if (roworiented) {
142         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
143       } else {
144         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
145       }
146     }
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
152   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
153   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
154 
155   PetscFunctionBegin;
156   for (i = 0; i < m; i++) {
157     if (idxm[i] < 0) continue; /* negative row */
158     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
159     if (idxm[i] >= rstart && idxm[i] < rend) {
160       row = idxm[i] - rstart;
161       for (j = 0; j < n; j++) {
162         if (idxn[j] < 0) continue; /* negative column */
163         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
164         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
165       }
166     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) {
172   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
173 
174   PetscFunctionBegin;
175   PetscCall(MatDenseGetLDA(a->A, lda));
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) {
180   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
181   PetscBool     iscuda;
182 
183   PetscFunctionBegin;
184   if (!a->A) {
185     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
186     PetscCall(PetscLayoutSetUp(A->rmap));
187     PetscCall(PetscLayoutSetUp(A->cmap));
188     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
189     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
190     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
191     PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
192   }
193   PetscCall(MatDenseSetLDA(a->A, lda));
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) {
198   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
199 
200   PetscFunctionBegin;
201   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202   PetscCall(MatDenseGetArray(a->A, array));
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) {
207   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
208 
209   PetscFunctionBegin;
210   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
211   PetscCall(MatDenseGetArrayRead(a->A, array));
212   PetscFunctionReturn(0);
213 }
214 
215 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) {
216   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
217 
218   PetscFunctionBegin;
219   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
220   PetscCall(MatDenseGetArrayWrite(a->A, array));
221   PetscFunctionReturn(0);
222 }
223 
224 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) {
225   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
226 
227   PetscFunctionBegin;
228   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
229   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
230   PetscCall(MatDensePlaceArray(a->A, array));
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) {
235   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
236 
237   PetscFunctionBegin;
238   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
239   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
240   PetscCall(MatDenseResetArray(a->A));
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) {
245   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
246 
247   PetscFunctionBegin;
248   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
249   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
250   PetscCall(MatDenseReplaceArray(a->A, array));
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) {
255   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
256   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
257   const PetscInt    *irow, *icol;
258   const PetscScalar *v;
259   PetscScalar       *bv;
260   Mat                newmat;
261   IS                 iscol_local;
262   MPI_Comm           comm_is, comm_mat;
263 
264   PetscFunctionBegin;
265   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
266   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
267   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
268 
269   PetscCall(ISAllGather(iscol, &iscol_local));
270   PetscCall(ISGetIndices(isrow, &irow));
271   PetscCall(ISGetIndices(iscol_local, &icol));
272   PetscCall(ISGetLocalSize(isrow, &nrows));
273   PetscCall(ISGetLocalSize(iscol, &ncols));
274   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
275 
276   /* No parallel redistribution currently supported! Should really check each index set
277      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
278      original matrix! */
279 
280   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
281   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
282 
283   /* Check submatrix call */
284   if (scall == MAT_REUSE_MATRIX) {
285     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
286     /* Really need to test rows and column sizes! */
287     newmat = *B;
288   } else {
289     /* Create and fill new matrix */
290     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
291     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
292     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
293     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
294   }
295 
296   /* Now extract the data pointers and do the copy, column at a time */
297   newmatd = (Mat_MPIDense *)newmat->data;
298   PetscCall(MatDenseGetArray(newmatd->A, &bv));
299   PetscCall(MatDenseGetArrayRead(mat->A, &v));
300   PetscCall(MatDenseGetLDA(mat->A, &lda));
301   for (i = 0; i < Ncols; i++) {
302     const PetscScalar *av = v + lda * icol[i];
303     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
304   }
305   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
306   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
307 
308   /* Assemble the matrices so that the correct flags are set */
309   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
310   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
311 
312   /* Free work space */
313   PetscCall(ISRestoreIndices(isrow, &irow));
314   PetscCall(ISRestoreIndices(iscol_local, &icol));
315   PetscCall(ISDestroy(&iscol_local));
316   *B = newmat;
317   PetscFunctionReturn(0);
318 }
319 
320 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) {
321   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
322 
323   PetscFunctionBegin;
324   PetscCall(MatDenseRestoreArray(a->A, array));
325   PetscFunctionReturn(0);
326 }
327 
328 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) {
329   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
330 
331   PetscFunctionBegin;
332   PetscCall(MatDenseRestoreArrayRead(a->A, array));
333   PetscFunctionReturn(0);
334 }
335 
336 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) {
337   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
338 
339   PetscFunctionBegin;
340   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
341   PetscFunctionReturn(0);
342 }
343 
344 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) {
345   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
346   PetscInt      nstash, reallocs;
347 
348   PetscFunctionBegin;
349   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
350 
351   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
352   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
353   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
354   PetscFunctionReturn(0);
355 }
356 
357 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) {
358   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
359   PetscInt      i, *row, *col, flg, j, rstart, ncols;
360   PetscMPIInt   n;
361   PetscScalar  *val;
362 
363   PetscFunctionBegin;
364   if (!mdn->donotstash && !mat->nooffprocentries) {
365     /*  wait on receives */
366     while (1) {
367       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
368       if (!flg) break;
369 
370       for (i = 0; i < n;) {
371         /* Now identify the consecutive vals belonging to the same row */
372         for (j = i, rstart = row[j]; j < n; j++) {
373           if (row[j] != rstart) break;
374         }
375         if (j < n) ncols = j - i;
376         else ncols = n - i;
377         /* Now assemble all these values with a single function call */
378         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
379         i = j;
380       }
381     }
382     PetscCall(MatStashScatterEnd_Private(&mat->stash));
383   }
384 
385   PetscCall(MatAssemblyBegin(mdn->A, mode));
386   PetscCall(MatAssemblyEnd(mdn->A, mode));
387   PetscFunctionReturn(0);
388 }
389 
390 PetscErrorCode MatZeroEntries_MPIDense(Mat A) {
391   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
392 
393   PetscFunctionBegin;
394   PetscCall(MatZeroEntries(l->A));
395   PetscFunctionReturn(0);
396 }
397 
398 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
399   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
400   PetscInt      i, len, *lrows;
401 
402   PetscFunctionBegin;
403   /* get locally owned rows */
404   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
405   /* fix right hand side if needed */
406   if (x && b) {
407     const PetscScalar *xx;
408     PetscScalar       *bb;
409 
410     PetscCall(VecGetArrayRead(x, &xx));
411     PetscCall(VecGetArrayWrite(b, &bb));
412     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
413     PetscCall(VecRestoreArrayRead(x, &xx));
414     PetscCall(VecRestoreArrayWrite(b, &bb));
415   }
416   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
417   if (diag != 0.0) {
418     Vec d;
419 
420     PetscCall(MatCreateVecs(A, NULL, &d));
421     PetscCall(VecSet(d, diag));
422     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
423     PetscCall(VecDestroy(&d));
424   }
425   PetscCall(PetscFree(lrows));
426   PetscFunctionReturn(0);
427 }
428 
429 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
430 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
431 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
432 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
433 
434 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) {
435   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
436   const PetscScalar *ax;
437   PetscScalar       *ay;
438   PetscMemType       axmtype, aymtype;
439 
440   PetscFunctionBegin;
441   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
442   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
443   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
444   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
445   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
446   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
447   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
448   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
449   PetscFunctionReturn(0);
450 }
451 
452 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) {
453   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
454   const PetscScalar *ax;
455   PetscScalar       *ay;
456   PetscMemType       axmtype, aymtype;
457 
458   PetscFunctionBegin;
459   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
460   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
461   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
462   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
463   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
464   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
465   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
466   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
467   PetscFunctionReturn(0);
468 }
469 
470 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) {
471   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
472   const PetscScalar *ax;
473   PetscScalar       *ay;
474   PetscMemType       axmtype, aymtype;
475 
476   PetscFunctionBegin;
477   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
478   PetscCall(VecSet(yy, 0.0));
479   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
480   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
481   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
482   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
483   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
484   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
485   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
486   PetscFunctionReturn(0);
487 }
488 
489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) {
490   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
491   const PetscScalar *ax;
492   PetscScalar       *ay;
493   PetscMemType       axmtype, aymtype;
494 
495   PetscFunctionBegin;
496   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
497   PetscCall(VecCopy(yy, zz));
498   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
499   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
500   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
501   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
502   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
503   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
504   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
505   PetscFunctionReturn(0);
506 }
507 
508 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) {
509   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
510   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
511   PetscScalar       *x, zero = 0.0;
512   const PetscScalar *av;
513 
514   PetscFunctionBegin;
515   PetscCall(VecSet(v, zero));
516   PetscCall(VecGetArray(v, &x));
517   PetscCall(VecGetSize(v, &n));
518   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
519   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
520   radd = A->rmap->rstart * m;
521   PetscCall(MatDenseGetArrayRead(a->A, &av));
522   PetscCall(MatDenseGetLDA(a->A, &lda));
523   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
524   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
525   PetscCall(VecRestoreArray(v, &x));
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatDestroy_MPIDense(Mat mat) {
530   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
531 
532   PetscFunctionBegin;
533 #if defined(PETSC_USE_LOG)
534   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
535 #endif
536   PetscCall(MatStashDestroy_Private(&mat->stash));
537   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
538   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
539   PetscCall(MatDestroy(&mdn->A));
540   PetscCall(VecDestroy(&mdn->lvec));
541   PetscCall(PetscSFDestroy(&mdn->Mvctx));
542   PetscCall(VecDestroy(&mdn->cvec));
543   PetscCall(MatDestroy(&mdn->cmat));
544 
545   PetscCall(PetscFree(mat->data));
546   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
547 
548   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
549   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
550   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
551   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
552   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
553   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
554   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
555   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
556   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
557   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
558   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
559   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
560   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
561 #if defined(PETSC_HAVE_ELEMENTAL)
562   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
563 #endif
564 #if defined(PETSC_HAVE_SCALAPACK)
565   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
566 #endif
567   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
570 #if defined(PETSC_HAVE_CUDA)
571   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
578   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
580   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
581   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
582   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
588 #endif
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
599 
600   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
601   PetscFunctionReturn(0);
602 }
603 
604 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
605 
606 #include <petscdraw.h>
607 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
608   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
609   PetscMPIInt       rank;
610   PetscViewerType   vtype;
611   PetscBool         iascii, isdraw;
612   PetscViewer       sviewer;
613   PetscViewerFormat format;
614 
615   PetscFunctionBegin;
616   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
617   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
618   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
619   if (iascii) {
620     PetscCall(PetscViewerGetType(viewer, &vtype));
621     PetscCall(PetscViewerGetFormat(viewer, &format));
622     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623       MatInfo info;
624       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
625       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
626       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
627                                                    (PetscInt)info.memory));
628       PetscCall(PetscViewerFlush(viewer));
629       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
630       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
631       PetscFunctionReturn(0);
632     } else if (format == PETSC_VIEWER_ASCII_INFO) {
633       PetscFunctionReturn(0);
634     }
635   } else if (isdraw) {
636     PetscDraw draw;
637     PetscBool isnull;
638 
639     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
640     PetscCall(PetscDrawIsNull(draw, &isnull));
641     if (isnull) PetscFunctionReturn(0);
642   }
643 
644   {
645     /* assemble the entire matrix onto first processor. */
646     Mat          A;
647     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
648     PetscInt    *cols;
649     PetscScalar *vals;
650 
651     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
652     if (rank == 0) {
653       PetscCall(MatSetSizes(A, M, N, M, N));
654     } else {
655       PetscCall(MatSetSizes(A, 0, 0, M, N));
656     }
657     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
658     PetscCall(MatSetType(A, MATMPIDENSE));
659     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
660 
661     /* Copy the matrix ... This isn't the most efficient means,
662        but it's quick for now */
663     A->insertmode = INSERT_VALUES;
664 
665     row = mat->rmap->rstart;
666     m   = mdn->A->rmap->n;
667     for (i = 0; i < m; i++) {
668       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
669       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
670       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
671       row++;
672     }
673 
674     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
675     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
676     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
677     if (rank == 0) {
678       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
679       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
680     }
681     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
682     PetscCall(PetscViewerFlush(viewer));
683     PetscCall(MatDestroy(&A));
684   }
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) {
689   PetscBool iascii, isbinary, isdraw, issocket;
690 
691   PetscFunctionBegin;
692   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
693   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
694   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
695   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
696 
697   if (iascii || issocket || isdraw) {
698     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
699   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
700   PetscFunctionReturn(0);
701 }
702 
703 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) {
704   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
705   Mat            mdn = mat->A;
706   PetscLogDouble isend[5], irecv[5];
707 
708   PetscFunctionBegin;
709   info->block_size = 1.0;
710 
711   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
712 
713   isend[0] = info->nz_used;
714   isend[1] = info->nz_allocated;
715   isend[2] = info->nz_unneeded;
716   isend[3] = info->memory;
717   isend[4] = info->mallocs;
718   if (flag == MAT_LOCAL) {
719     info->nz_used      = isend[0];
720     info->nz_allocated = isend[1];
721     info->nz_unneeded  = isend[2];
722     info->memory       = isend[3];
723     info->mallocs      = isend[4];
724   } else if (flag == MAT_GLOBAL_MAX) {
725     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
726 
727     info->nz_used      = irecv[0];
728     info->nz_allocated = irecv[1];
729     info->nz_unneeded  = irecv[2];
730     info->memory       = irecv[3];
731     info->mallocs      = irecv[4];
732   } else if (flag == MAT_GLOBAL_SUM) {
733     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
734 
735     info->nz_used      = irecv[0];
736     info->nz_allocated = irecv[1];
737     info->nz_unneeded  = irecv[2];
738     info->memory       = irecv[3];
739     info->mallocs      = irecv[4];
740   }
741   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
742   info->fill_ratio_needed = 0;
743   info->factor_mallocs    = 0;
744   PetscFunctionReturn(0);
745 }
746 
747 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) {
748   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
749 
750   PetscFunctionBegin;
751   switch (op) {
752   case MAT_NEW_NONZERO_LOCATIONS:
753   case MAT_NEW_NONZERO_LOCATION_ERR:
754   case MAT_NEW_NONZERO_ALLOCATION_ERR:
755     MatCheckPreallocated(A, 1);
756     PetscCall(MatSetOption(a->A, op, flg));
757     break;
758   case MAT_ROW_ORIENTED:
759     MatCheckPreallocated(A, 1);
760     a->roworiented = flg;
761     PetscCall(MatSetOption(a->A, op, flg));
762     break;
763   case MAT_FORCE_DIAGONAL_ENTRIES:
764   case MAT_KEEP_NONZERO_PATTERN:
765   case MAT_USE_HASH_TABLE:
766   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
767   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
768   case MAT_SYMMETRIC:
769   case MAT_STRUCTURALLY_SYMMETRIC:
770   case MAT_HERMITIAN:
771   case MAT_SYMMETRY_ETERNAL:
772   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
773   case MAT_SPD:
774   case MAT_IGNORE_LOWER_TRIANGULAR:
775   case MAT_IGNORE_ZERO_ENTRIES:
776   case MAT_SPD_ETERNAL:
777     /* if the diagonal matrix is square it inherits some of the properties above */
778     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
779     break;
780   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) {
786   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
787   const PetscScalar *l;
788   PetscScalar        x, *v, *vv, *r;
789   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
790 
791   PetscFunctionBegin;
792   PetscCall(MatDenseGetArray(mdn->A, &vv));
793   PetscCall(MatDenseGetLDA(mdn->A, &lda));
794   PetscCall(MatGetLocalSize(A, &s2, &s3));
795   if (ll) {
796     PetscCall(VecGetLocalSize(ll, &s2a));
797     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
798     PetscCall(VecGetArrayRead(ll, &l));
799     for (i = 0; i < m; i++) {
800       x = l[i];
801       v = vv + i;
802       for (j = 0; j < n; j++) {
803         (*v) *= x;
804         v += lda;
805       }
806     }
807     PetscCall(VecRestoreArrayRead(ll, &l));
808     PetscCall(PetscLogFlops(1.0 * n * m));
809   }
810   if (rr) {
811     const PetscScalar *ar;
812 
813     PetscCall(VecGetLocalSize(rr, &s3a));
814     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
815     PetscCall(VecGetArrayRead(rr, &ar));
816     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
817     PetscCall(VecGetArray(mdn->lvec, &r));
818     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
819     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
820     PetscCall(VecRestoreArrayRead(rr, &ar));
821     for (i = 0; i < n; i++) {
822       x = r[i];
823       v = vv + i * lda;
824       for (j = 0; j < m; j++) (*v++) *= x;
825     }
826     PetscCall(VecRestoreArray(mdn->lvec, &r));
827     PetscCall(PetscLogFlops(1.0 * n * m));
828   }
829   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
830   PetscFunctionReturn(0);
831 }
832 
833 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) {
834   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
835   PetscInt           i, j;
836   PetscMPIInt        size;
837   PetscReal          sum = 0.0;
838   const PetscScalar *av, *v;
839 
840   PetscFunctionBegin;
841   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
842   v = av;
843   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
844   if (size == 1) {
845     PetscCall(MatNorm(mdn->A, type, nrm));
846   } else {
847     if (type == NORM_FROBENIUS) {
848       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
849         sum += PetscRealPart(PetscConj(*v) * (*v));
850         v++;
851       }
852       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
853       *nrm = PetscSqrtReal(*nrm);
854       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
855     } else if (type == NORM_1) {
856       PetscReal *tmp, *tmp2;
857       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
858       *nrm = 0.0;
859       v    = av;
860       for (j = 0; j < mdn->A->cmap->n; j++) {
861         for (i = 0; i < mdn->A->rmap->n; i++) {
862           tmp[j] += PetscAbsScalar(*v);
863           v++;
864         }
865       }
866       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
867       for (j = 0; j < A->cmap->N; j++) {
868         if (tmp2[j] > *nrm) *nrm = tmp2[j];
869       }
870       PetscCall(PetscFree2(tmp, tmp2));
871       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
872     } else if (type == NORM_INFINITY) { /* max row norm */
873       PetscReal ntemp;
874       PetscCall(MatNorm(mdn->A, type, &ntemp));
875       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
876     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
877   }
878   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
879   PetscFunctionReturn(0);
880 }
881 
882 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) {
883   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
884   Mat           B;
885   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
886   PetscInt      j, i, lda;
887   PetscScalar  *v;
888 
889   PetscFunctionBegin;
890   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
891   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
892     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
893     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
894     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
895     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
896   } else B = *matout;
897 
898   m = a->A->rmap->n;
899   n = a->A->cmap->n;
900   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
901   PetscCall(MatDenseGetLDA(a->A, &lda));
902   PetscCall(PetscMalloc1(m, &rwork));
903   for (i = 0; i < m; i++) rwork[i] = rstart + i;
904   for (j = 0; j < n; j++) {
905     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
906     v += lda;
907   }
908   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
909   PetscCall(PetscFree(rwork));
910   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
911   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
912   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
913     *matout = B;
914   } else {
915     PetscCall(MatHeaderMerge(A, &B));
916   }
917   PetscFunctionReturn(0);
918 }
919 
920 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
921 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
922 
923 PetscErrorCode MatSetUp_MPIDense(Mat A) {
924   PetscFunctionBegin;
925   PetscCall(PetscLayoutSetUp(A->rmap));
926   PetscCall(PetscLayoutSetUp(A->cmap));
927   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
928   PetscFunctionReturn(0);
929 }
930 
931 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
932   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
933 
934   PetscFunctionBegin;
935   PetscCall(MatAXPY(A->A, alpha, B->A, str));
936   PetscFunctionReturn(0);
937 }
938 
939 PetscErrorCode MatConjugate_MPIDense(Mat mat) {
940   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
941 
942   PetscFunctionBegin;
943   PetscCall(MatConjugate(a->A));
944   PetscFunctionReturn(0);
945 }
946 
947 PetscErrorCode MatRealPart_MPIDense(Mat A) {
948   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
949 
950   PetscFunctionBegin;
951   PetscCall(MatRealPart(a->A));
952   PetscFunctionReturn(0);
953 }
954 
955 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) {
956   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
957 
958   PetscFunctionBegin;
959   PetscCall(MatImaginaryPart(a->A));
960   PetscFunctionReturn(0);
961 }
962 
963 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) {
964   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
965 
966   PetscFunctionBegin;
967   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
968   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
969   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
970   PetscFunctionReturn(0);
971 }
972 
973 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
974 
975 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) {
976   PetscInt      i, m, n;
977   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
978   PetscReal    *work;
979 
980   PetscFunctionBegin;
981   PetscCall(MatGetSize(A, &m, &n));
982   PetscCall(PetscMalloc1(n, &work));
983   if (type == REDUCTION_MEAN_REALPART) {
984     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
985   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
986     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
987   } else {
988     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
989   }
990   if (type == NORM_2) {
991     for (i = 0; i < n; i++) work[i] *= work[i];
992   }
993   if (type == NORM_INFINITY) {
994     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
995   } else {
996     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
997   }
998   PetscCall(PetscFree(work));
999   if (type == NORM_2) {
1000     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1001   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1002     for (i = 0; i < n; i++) reductions[i] /= m;
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #if defined(PETSC_HAVE_CUDA)
1008 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) {
1009   PetscScalar *da;
1010   PetscInt     lda;
1011 
1012   PetscFunctionBegin;
1013   PetscCall(MatDenseCUDAGetArray(A, &da));
1014   PetscCall(MatDenseGetLDA(A, &lda));
1015   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1016   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1017   PetscCall(MatDenseCUDARestoreArray(A, &da));
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1022   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1023   PetscInt      lda;
1024 
1025   PetscFunctionBegin;
1026   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1027   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1028   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1029   a->vecinuse = col + 1;
1030   PetscCall(MatDenseGetLDA(a->A, &lda));
1031   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1032   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1033   *v = a->cvec;
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1038   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1039 
1040   PetscFunctionBegin;
1041   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1042   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1043   a->vecinuse = 0;
1044   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1045   PetscCall(VecCUDAResetArray(a->cvec));
1046   if (v) *v = NULL;
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1051   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1052   PetscInt      lda;
1053 
1054   PetscFunctionBegin;
1055   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1056   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1057   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1058   a->vecinuse = col + 1;
1059   PetscCall(MatDenseGetLDA(a->A, &lda));
1060   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1061   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1062   PetscCall(VecLockReadPush(a->cvec));
1063   *v = a->cvec;
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1068   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1069 
1070   PetscFunctionBegin;
1071   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1072   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1073   a->vecinuse = 0;
1074   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1075   PetscCall(VecLockReadPop(a->cvec));
1076   PetscCall(VecCUDAResetArray(a->cvec));
1077   if (v) *v = NULL;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1082   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1083   PetscInt      lda;
1084 
1085   PetscFunctionBegin;
1086   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1087   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1088   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1089   a->vecinuse = col + 1;
1090   PetscCall(MatDenseGetLDA(a->A, &lda));
1091   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1092   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1093   *v = a->cvec;
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1098   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1099 
1100   PetscFunctionBegin;
1101   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1102   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1103   a->vecinuse = 0;
1104   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1105   PetscCall(VecCUDAResetArray(a->cvec));
1106   if (v) *v = NULL;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1111   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1115   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1116   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) {
1121   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1122 
1123   PetscFunctionBegin;
1124   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1125   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1126   PetscCall(MatDenseCUDAResetArray(l->A));
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1131   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1132 
1133   PetscFunctionBegin;
1134   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1135   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1136   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1141   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1142 
1143   PetscFunctionBegin;
1144   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1149   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1150 
1151   PetscFunctionBegin;
1152   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1157   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1165   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1173   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1174 
1175   PetscFunctionBegin;
1176   PetscCall(MatDenseCUDAGetArray(l->A, a));
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1181   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1182 
1183   PetscFunctionBegin;
1184   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1189 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1190 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1191 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1192 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1193 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1194 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1195 
1196 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) {
1197   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1198 
1199   PetscFunctionBegin;
1200   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1201   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1202   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1203   mat->boundtocpu = bind;
1204   if (!bind) {
1205     PetscBool iscuda;
1206 
1207     PetscCall(PetscFree(mat->defaultrandtype));
1208     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1209     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1210     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1211     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1212     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1213     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1214     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1215     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1216     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1217     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1218     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1219     mat->ops->shift = MatShift_MPIDenseCUDA;
1220   } else {
1221     PetscCall(PetscFree(mat->defaultrandtype));
1222     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1223     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1224     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1225     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1226     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1227     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1228     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1229     mat->ops->shift = MatShift_MPIDense;
1230   }
1231   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) {
1236   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1237   PetscBool     iscuda;
1238 
1239   PetscFunctionBegin;
1240   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1241   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1242   if (!iscuda) PetscFunctionReturn(0);
1243   PetscCall(PetscLayoutSetUp(A->rmap));
1244   PetscCall(PetscLayoutSetUp(A->cmap));
1245   if (!d->A) {
1246     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1247     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1248   }
1249   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1250   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1251   A->preallocated = PETSC_TRUE;
1252   A->assembled    = PETSC_TRUE;
1253   PetscFunctionReturn(0);
1254 }
1255 #endif
1256 
1257 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) {
1258   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1259 
1260   PetscFunctionBegin;
1261   PetscCall(MatSetRandom(d->A, rctx));
1262 #if defined(PETSC_HAVE_DEVICE)
1263   x->offloadmask = d->A->offloadmask;
1264 #endif
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) {
1269   PetscFunctionBegin;
1270   *missing = PETSC_FALSE;
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1275 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1276 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1277 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1278 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1279 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1280 
1281 /* -------------------------------------------------------------------*/
1282 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1283                                        MatGetRow_MPIDense,
1284                                        MatRestoreRow_MPIDense,
1285                                        MatMult_MPIDense,
1286                                        /*  4*/ MatMultAdd_MPIDense,
1287                                        MatMultTranspose_MPIDense,
1288                                        MatMultTransposeAdd_MPIDense,
1289                                        NULL,
1290                                        NULL,
1291                                        NULL,
1292                                        /* 10*/ NULL,
1293                                        NULL,
1294                                        NULL,
1295                                        NULL,
1296                                        MatTranspose_MPIDense,
1297                                        /* 15*/ MatGetInfo_MPIDense,
1298                                        MatEqual_MPIDense,
1299                                        MatGetDiagonal_MPIDense,
1300                                        MatDiagonalScale_MPIDense,
1301                                        MatNorm_MPIDense,
1302                                        /* 20*/ MatAssemblyBegin_MPIDense,
1303                                        MatAssemblyEnd_MPIDense,
1304                                        MatSetOption_MPIDense,
1305                                        MatZeroEntries_MPIDense,
1306                                        /* 24*/ MatZeroRows_MPIDense,
1307                                        NULL,
1308                                        NULL,
1309                                        NULL,
1310                                        NULL,
1311                                        /* 29*/ MatSetUp_MPIDense,
1312                                        NULL,
1313                                        NULL,
1314                                        MatGetDiagonalBlock_MPIDense,
1315                                        NULL,
1316                                        /* 34*/ MatDuplicate_MPIDense,
1317                                        NULL,
1318                                        NULL,
1319                                        NULL,
1320                                        NULL,
1321                                        /* 39*/ MatAXPY_MPIDense,
1322                                        MatCreateSubMatrices_MPIDense,
1323                                        NULL,
1324                                        MatGetValues_MPIDense,
1325                                        MatCopy_MPIDense,
1326                                        /* 44*/ NULL,
1327                                        MatScale_MPIDense,
1328                                        MatShift_MPIDense,
1329                                        NULL,
1330                                        NULL,
1331                                        /* 49*/ MatSetRandom_MPIDense,
1332                                        NULL,
1333                                        NULL,
1334                                        NULL,
1335                                        NULL,
1336                                        /* 54*/ NULL,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1342                                        MatDestroy_MPIDense,
1343                                        MatView_MPIDense,
1344                                        NULL,
1345                                        NULL,
1346                                        /* 64*/ NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        /* 69*/ NULL,
1352                                        NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        /* 74*/ NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        /* 79*/ NULL,
1362                                        NULL,
1363                                        NULL,
1364                                        NULL,
1365                                        /* 83*/ MatLoad_MPIDense,
1366                                        NULL,
1367                                        NULL,
1368                                        NULL,
1369                                        NULL,
1370                                        NULL,
1371                                        /* 89*/ NULL,
1372                                        NULL,
1373                                        NULL,
1374                                        NULL,
1375                                        NULL,
1376                                        /* 94*/ NULL,
1377                                        NULL,
1378                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1379                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1380                                        NULL,
1381                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1382                                        NULL,
1383                                        NULL,
1384                                        MatConjugate_MPIDense,
1385                                        NULL,
1386                                        /*104*/ NULL,
1387                                        MatRealPart_MPIDense,
1388                                        MatImaginaryPart_MPIDense,
1389                                        NULL,
1390                                        NULL,
1391                                        /*109*/ NULL,
1392                                        NULL,
1393                                        NULL,
1394                                        MatGetColumnVector_MPIDense,
1395                                        MatMissingDiagonal_MPIDense,
1396                                        /*114*/ NULL,
1397                                        NULL,
1398                                        NULL,
1399                                        NULL,
1400                                        NULL,
1401                                        /*119*/ NULL,
1402                                        NULL,
1403                                        NULL,
1404                                        NULL,
1405                                        NULL,
1406                                        /*124*/ NULL,
1407                                        MatGetColumnReductions_MPIDense,
1408                                        NULL,
1409                                        NULL,
1410                                        NULL,
1411                                        /*129*/ NULL,
1412                                        NULL,
1413                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1414                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1415                                        NULL,
1416                                        /*134*/ NULL,
1417                                        NULL,
1418                                        NULL,
1419                                        NULL,
1420                                        NULL,
1421                                        /*139*/ NULL,
1422                                        NULL,
1423                                        NULL,
1424                                        NULL,
1425                                        NULL,
1426                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1427                                        /*145*/ NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        NULL,
1431                                        NULL,
1432                                        /*150*/ NULL};
1433 
1434 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) {
1435   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1436   PetscBool     iscuda;
1437 
1438   PetscFunctionBegin;
1439   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1440   PetscCall(PetscLayoutSetUp(mat->rmap));
1441   PetscCall(PetscLayoutSetUp(mat->cmap));
1442   if (!a->A) {
1443     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1444     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1445   }
1446   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1447   PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1448   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1449 #if defined(PETSC_HAVE_DEVICE)
1450   mat->offloadmask = a->A->offloadmask;
1451 #endif
1452   mat->preallocated = PETSC_TRUE;
1453   mat->assembled    = PETSC_TRUE;
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1458   Mat B, C;
1459 
1460   PetscFunctionBegin;
1461   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1462   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1463   PetscCall(MatDestroy(&C));
1464   if (reuse == MAT_REUSE_MATRIX) {
1465     C = *newmat;
1466   } else C = NULL;
1467   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1468   PetscCall(MatDestroy(&B));
1469   if (reuse == MAT_INPLACE_MATRIX) {
1470     PetscCall(MatHeaderReplace(A, &C));
1471   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1476   Mat B, C;
1477 
1478   PetscFunctionBegin;
1479   PetscCall(MatDenseGetLocalMatrix(A, &C));
1480   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1481   if (reuse == MAT_REUSE_MATRIX) {
1482     C = *newmat;
1483   } else C = NULL;
1484   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1485   PetscCall(MatDestroy(&B));
1486   if (reuse == MAT_INPLACE_MATRIX) {
1487     PetscCall(MatHeaderReplace(A, &C));
1488   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 #if defined(PETSC_HAVE_ELEMENTAL)
1493 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1494   Mat          mat_elemental;
1495   PetscScalar *v;
1496   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1497 
1498   PetscFunctionBegin;
1499   if (reuse == MAT_REUSE_MATRIX) {
1500     mat_elemental = *newmat;
1501     PetscCall(MatZeroEntries(*newmat));
1502   } else {
1503     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1504     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1505     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1506     PetscCall(MatSetUp(mat_elemental));
1507     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1508   }
1509 
1510   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1511   for (i = 0; i < N; i++) cols[i] = i;
1512   for (i = 0; i < m; i++) rows[i] = rstart + i;
1513 
1514   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1515   PetscCall(MatDenseGetArray(A, &v));
1516   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1517   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1518   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1519   PetscCall(MatDenseRestoreArray(A, &v));
1520   PetscCall(PetscFree2(rows, cols));
1521 
1522   if (reuse == MAT_INPLACE_MATRIX) {
1523     PetscCall(MatHeaderReplace(A, &mat_elemental));
1524   } else {
1525     *newmat = mat_elemental;
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 #endif
1530 
1531 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) {
1532   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1533 
1534   PetscFunctionBegin;
1535   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) {
1540   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1541 
1542   PetscFunctionBegin;
1543   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
1548   Mat_MPIDense *mat;
1549   PetscInt      m, nloc, N;
1550 
1551   PetscFunctionBegin;
1552   PetscCall(MatGetSize(inmat, &m, &N));
1553   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1554   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1555     PetscInt sum;
1556 
1557     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1558     /* Check sum(n) = N */
1559     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1560     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1561 
1562     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1563     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1564   }
1565 
1566   /* numeric phase */
1567   mat = (Mat_MPIDense *)(*outmat)->data;
1568   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #if defined(PETSC_HAVE_CUDA)
1573 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1574   Mat           B;
1575   Mat_MPIDense *m;
1576 
1577   PetscFunctionBegin;
1578   if (reuse == MAT_INITIAL_MATRIX) {
1579     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1580   } else if (reuse == MAT_REUSE_MATRIX) {
1581     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1582   }
1583 
1584   B = *newmat;
1585   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1586   PetscCall(PetscFree(B->defaultvectype));
1587   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1588   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1597   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1598   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1599   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1600   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1601   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1602   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1603   m = (Mat_MPIDense *)(B)->data;
1604   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1605   B->ops->bindtocpu = NULL;
1606   B->offloadmask    = PETSC_OFFLOAD_CPU;
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1611   Mat           B;
1612   Mat_MPIDense *m;
1613 
1614   PetscFunctionBegin;
1615   if (reuse == MAT_INITIAL_MATRIX) {
1616     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1617   } else if (reuse == MAT_REUSE_MATRIX) {
1618     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1619   }
1620 
1621   B = *newmat;
1622   PetscCall(PetscFree(B->defaultvectype));
1623   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
1624   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
1625   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
1639   m = (Mat_MPIDense *)(B->data);
1640   if (m->A) {
1641     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
1642     B->offloadmask = PETSC_OFFLOAD_BOTH;
1643   } else {
1644     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1645   }
1646   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
1647 
1648   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1649   PetscFunctionReturn(0);
1650 }
1651 #endif
1652 
1653 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1654   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1655   PetscInt      lda;
1656 
1657   PetscFunctionBegin;
1658   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1659   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1660   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1661   a->vecinuse = col + 1;
1662   PetscCall(MatDenseGetLDA(a->A, &lda));
1663   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1664   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1665   *v = a->cvec;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1670   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1671 
1672   PetscFunctionBegin;
1673   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1674   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1675   a->vecinuse = 0;
1676   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1677   PetscCall(VecResetArray(a->cvec));
1678   if (v) *v = NULL;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1683   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1684   PetscInt      lda;
1685 
1686   PetscFunctionBegin;
1687   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1688   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1689   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1690   a->vecinuse = col + 1;
1691   PetscCall(MatDenseGetLDA(a->A, &lda));
1692   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1693   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1694   PetscCall(VecLockReadPush(a->cvec));
1695   *v = a->cvec;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1700   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1701 
1702   PetscFunctionBegin;
1703   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1704   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1705   a->vecinuse = 0;
1706   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1707   PetscCall(VecLockReadPop(a->cvec));
1708   PetscCall(VecResetArray(a->cvec));
1709   if (v) *v = NULL;
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1714   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1715   PetscInt      lda;
1716 
1717   PetscFunctionBegin;
1718   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1719   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1720   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1721   a->vecinuse = col + 1;
1722   PetscCall(MatDenseGetLDA(a->A, &lda));
1723   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1724   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1725   *v = a->cvec;
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1730   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1731 
1732   PetscFunctionBegin;
1733   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1734   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1735   a->vecinuse = 0;
1736   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1737   PetscCall(VecResetArray(a->cvec));
1738   if (v) *v = NULL;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) {
1743   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1744   Mat_MPIDense *c;
1745   MPI_Comm      comm;
1746   PetscInt      pbegin, pend;
1747 
1748   PetscFunctionBegin;
1749   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1750   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1751   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1752   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1753   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1754   if (!a->cmat) {
1755     PetscCall(MatCreate(comm, &a->cmat));
1756     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1757     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1758     else {
1759       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1760       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1761       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1762     }
1763     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1764     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1765   } else {
1766     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1767     if (same && a->cmat->rmap->N != A->rmap->N) {
1768       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1769       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1770     }
1771     if (!same) {
1772       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1773       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1774       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1775       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1776       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1777     }
1778     if (cend - cbegin != a->cmat->cmap->N) {
1779       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1780       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1781       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1782       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1783     }
1784   }
1785   c = (Mat_MPIDense *)a->cmat->data;
1786   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1787   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1788 
1789   a->cmat->preallocated = PETSC_TRUE;
1790   a->cmat->assembled    = PETSC_TRUE;
1791 #if defined(PETSC_HAVE_DEVICE)
1792   a->cmat->offloadmask = c->A->offloadmask;
1793 #endif
1794   a->matinuse = cbegin + 1;
1795   *v          = a->cmat;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) {
1800   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1801   Mat_MPIDense *c;
1802 
1803   PetscFunctionBegin;
1804   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1805   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1806   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1807   a->matinuse = 0;
1808   c           = (Mat_MPIDense *)a->cmat->data;
1809   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1810   if (v) *v = NULL;
1811 #if defined(PETSC_HAVE_DEVICE)
1812   A->offloadmask = a->A->offloadmask;
1813 #endif
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 /*MC
1818    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1819 
1820    Options Database Keys:
1821 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1822 
1823   Level: beginner
1824 
1825 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1826 M*/
1827 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) {
1828   Mat_MPIDense *a;
1829 
1830   PetscFunctionBegin;
1831   PetscCall(PetscNew(&a));
1832   mat->data = (void *)a;
1833   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1834 
1835   mat->insertmode = NOT_SET_VALUES;
1836 
1837   /* build cache for off array entries formed */
1838   a->donotstash = PETSC_FALSE;
1839 
1840   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1841 
1842   /* stuff used for matrix vector multiply */
1843   a->lvec        = NULL;
1844   a->Mvctx       = NULL;
1845   a->roworiented = PETSC_TRUE;
1846 
1847   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1848   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1849   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1850   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1851   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1852   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1853   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1854   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1855   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1856   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1857   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1858   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1859   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1860   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1861   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1862   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1863   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1864   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1865   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1866   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1867   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1868 #if defined(PETSC_HAVE_ELEMENTAL)
1869   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1870 #endif
1871 #if defined(PETSC_HAVE_SCALAPACK)
1872   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1873 #endif
1874 #if defined(PETSC_HAVE_CUDA)
1875   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1876 #endif
1877   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1878   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1880 #if defined(PETSC_HAVE_CUDA)
1881   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1882   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1883 #endif
1884 
1885   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1886   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1887   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 /*MC
1892    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1893 
1894    Options Database Keys:
1895 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
1896 
1897   Level: beginner
1898 
1899 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1900 M*/
1901 #if defined(PETSC_HAVE_CUDA)
1902 #include <petsc/private/deviceimpl.h>
1903 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) {
1904   PetscFunctionBegin;
1905   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1906   PetscCall(MatCreate_MPIDense(B));
1907   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
1908   PetscFunctionReturn(0);
1909 }
1910 #endif
1911 
1912 /*MC
1913    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1914 
1915    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1916    and `MATMPIDENSE` otherwise.
1917 
1918    Options Database Keys:
1919 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1920 
1921   Level: beginner
1922 
1923 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1924 M*/
1925 
1926 /*MC
1927    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1928 
1929    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
1930    and `MATMPIDENSECUDA` otherwise.
1931 
1932    Options Database Keys:
1933 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
1934 
1935   Level: beginner
1936 
1937 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1938 M*/
1939 
1940 /*@C
1941    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1942 
1943    Collective
1944 
1945    Input Parameters:
1946 .  B - the matrix
1947 -  data - optional location of matrix data.  Set data=NULL for PETSc
1948    to control all matrix memory allocation.
1949 
1950    Notes:
1951    The dense format is fully compatible with standard Fortran 77
1952    storage by columns.
1953 
1954    The data input variable is intended primarily for Fortran programmers
1955    who wish to allocate their own matrix memory space.  Most users should
1956    set data=NULL.
1957 
1958    Level: intermediate
1959 
1960 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1961 @*/
1962 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) {
1963   PetscFunctionBegin;
1964   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1965   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 /*@
1970    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1971    array provided by the user. This is useful to avoid copying an array
1972    into a matrix
1973 
1974    Not Collective
1975 
1976    Input Parameters:
1977 +  mat - the matrix
1978 -  array - the array in column major order
1979 
1980    Note:
1981    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1982    freed when the matrix is destroyed.
1983 
1984    Level: developer
1985 
1986 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1987           `MatDenseReplaceArray()`
1988 @*/
1989 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) {
1990   PetscFunctionBegin;
1991   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1992   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1993   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1994 #if defined(PETSC_HAVE_DEVICE)
1995   mat->offloadmask = PETSC_OFFLOAD_CPU;
1996 #endif
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 /*@
2001    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2002 
2003    Not Collective
2004 
2005    Input Parameters:
2006 .  mat - the matrix
2007 
2008    Note:
2009    You can only call this after a call to `MatDensePlaceArray()`
2010 
2011    Level: developer
2012 
2013 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2014 @*/
2015 PetscErrorCode MatDenseResetArray(Mat mat) {
2016   PetscFunctionBegin;
2017   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2018   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2019   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /*@
2024    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2025    array provided by the user. This is useful to avoid copying an array
2026    into a matrix
2027 
2028    Not Collective
2029 
2030    Input Parameters:
2031 +  mat - the matrix
2032 -  array - the array in column major order
2033 
2034    Note:
2035    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2036    freed by the user. It will be freed when the matrix is destroyed.
2037 
2038    Level: developer
2039 
2040 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2041 @*/
2042 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) {
2043   PetscFunctionBegin;
2044   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2045   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2046   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2047 #if defined(PETSC_HAVE_DEVICE)
2048   mat->offloadmask = PETSC_OFFLOAD_CPU;
2049 #endif
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #if defined(PETSC_HAVE_CUDA)
2054 /*@C
2055    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2056    array provided by the user. This is useful to avoid copying an array
2057    into a matrix
2058 
2059    Not Collective
2060 
2061    Input Parameters:
2062 +  mat - the matrix
2063 -  array - the array in column major order
2064 
2065    Note:
2066    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2067    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2068 
2069    Level: developer
2070 
2071 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2072 @*/
2073 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) {
2074   PetscFunctionBegin;
2075   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2076   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2077   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2078   mat->offloadmask = PETSC_OFFLOAD_GPU;
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /*@C
2083    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2084 
2085    Not Collective
2086 
2087    Input Parameters:
2088 .  mat - the matrix
2089 
2090    Note:
2091    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2092 
2093    Level: developer
2094 
2095 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2096 @*/
2097 PetscErrorCode MatDenseCUDAResetArray(Mat mat) {
2098   PetscFunctionBegin;
2099   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2100   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2101   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /*@C
2106    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2107    array provided by the user. This is useful to avoid copying an array
2108    into a matrix
2109 
2110    Not Collective
2111 
2112    Input Parameters:
2113 +  mat - the matrix
2114 -  array - the array in column major order
2115 
2116    Note:
2117    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2118    The memory passed in CANNOT be freed by the user. It will be freed
2119    when the matrix is destroyed. The array should respect the matrix leading dimension.
2120 
2121    Level: developer
2122 
2123 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2124 @*/
2125 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) {
2126   PetscFunctionBegin;
2127   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2128   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2129   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2130   mat->offloadmask = PETSC_OFFLOAD_GPU;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*@C
2135    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2136 
2137    Not Collective
2138 
2139    Input Parameters:
2140 .  A - the matrix
2141 
2142    Output Parameters
2143 .  array - the GPU array in column major order
2144 
2145    Notes:
2146    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2147 
2148    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2149 
2150    Level: developer
2151 
2152 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2153 @*/
2154 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) {
2155   PetscFunctionBegin;
2156   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2157   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2158   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 /*@C
2163    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2164 
2165    Not Collective
2166 
2167    Input Parameters:
2168 +  A - the matrix
2169 -  array - the GPU array in column major order
2170 
2171    Level: developer
2172 
2173 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2174 @*/
2175 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) {
2176   PetscFunctionBegin;
2177   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2178   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2179   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2180   A->offloadmask = PETSC_OFFLOAD_GPU;
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /*@C
2185    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2186 
2187    Not Collective
2188 
2189    Input Parameters:
2190 .  A - the matrix
2191 
2192    Output Parameters
2193 .  array - the GPU array in column major order
2194 
2195    Note:
2196    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2197 
2198    Level: developer
2199 
2200 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2201 @*/
2202 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) {
2203   PetscFunctionBegin;
2204   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2205   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 /*@C
2210    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2211 
2212    Not Collective
2213 
2214    Input Parameters:
2215 +  A - the matrix
2216 -  array - the GPU array in column major order
2217 
2218    Note:
2219    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2220 
2221    Level: developer
2222 
2223 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2224 @*/
2225 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) {
2226   PetscFunctionBegin;
2227   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 /*@C
2232    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2233 
2234    Not Collective
2235 
2236    Input Parameters:
2237 .  A - the matrix
2238 
2239    Output Parameters
2240 .  array - the GPU array in column major order
2241 
2242    Note:
2243    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2244 
2245    Level: developer
2246 
2247 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2248 @*/
2249 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) {
2250   PetscFunctionBegin;
2251   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2252   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2253   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 /*@C
2258    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2259 
2260    Not Collective
2261 
2262    Input Parameters:
2263 +  A - the matrix
2264 -  array - the GPU array in column major order
2265 
2266    Level: developer
2267 
2268 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2269 @*/
2270 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) {
2271   PetscFunctionBegin;
2272   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2273   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2274   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2275   A->offloadmask = PETSC_OFFLOAD_GPU;
2276   PetscFunctionReturn(0);
2277 }
2278 #endif
2279 
2280 /*@C
2281    MatCreateDense - Creates a matrix in `MATDENSE` format.
2282 
2283    Collective
2284 
2285    Input Parameters:
2286 +  comm - MPI communicator
2287 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2288 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2289 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2290 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2291 -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2292    to control all matrix memory allocation.
2293 
2294    Output Parameter:
2295 .  A - the matrix
2296 
2297    Notes:
2298    The dense format is fully compatible with standard Fortran 77
2299    storage by columns.
2300 
2301    Although local portions of the matrix are stored in column-major
2302    order, the matrix is partitioned across MPI ranks by row.
2303 
2304    The data input variable is intended primarily for Fortran programmers
2305    who wish to allocate their own matrix memory space.  Most users should
2306    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
2307 
2308    The user MUST specify either the local or global matrix dimensions
2309    (possibly both).
2310 
2311    Level: intermediate
2312 
2313 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2314 @*/
2315 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2316   PetscFunctionBegin;
2317   PetscCall(MatCreate(comm, A));
2318   PetscCall(MatSetSizes(*A, m, n, M, N));
2319   PetscCall(MatSetType(*A, MATDENSE));
2320   PetscCall(MatSeqDenseSetPreallocation(*A, data));
2321   PetscCall(MatMPIDenseSetPreallocation(*A, data));
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #if defined(PETSC_HAVE_CUDA)
2326 /*@C
2327    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
2328 
2329    Collective
2330 
2331    Input Parameters:
2332 +  comm - MPI communicator
2333 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2334 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2335 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2336 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2337 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2338    to control matrix memory allocation.
2339 
2340    Output Parameter:
2341 .  A - the matrix
2342 
2343    Level: intermediate
2344 
2345 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2346 @*/
2347 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2348   PetscFunctionBegin;
2349   PetscCall(MatCreate(comm, A));
2350   PetscValidLogicalCollectiveBool(*A, !!data, 6);
2351   PetscCall(MatSetSizes(*A, m, n, M, N));
2352   PetscCall(MatSetType(*A, MATDENSECUDA));
2353   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
2354   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
2355   PetscFunctionReturn(0);
2356 }
2357 #endif
2358 
2359 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) {
2360   Mat           mat;
2361   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
2362 
2363   PetscFunctionBegin;
2364   *newmat = NULL;
2365   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
2366   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2367   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2368   a = (Mat_MPIDense *)mat->data;
2369 
2370   mat->factortype   = A->factortype;
2371   mat->assembled    = PETSC_TRUE;
2372   mat->preallocated = PETSC_TRUE;
2373 
2374   mat->insertmode = NOT_SET_VALUES;
2375   a->donotstash   = oldmat->donotstash;
2376 
2377   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2378   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2379 
2380   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2381 
2382   *newmat = mat;
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) {
2387   PetscBool isbinary;
2388 #if defined(PETSC_HAVE_HDF5)
2389   PetscBool ishdf5;
2390 #endif
2391 
2392   PetscFunctionBegin;
2393   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2394   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2395   /* force binary viewer to load .info file if it has not yet done so */
2396   PetscCall(PetscViewerSetUp(viewer));
2397   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2398 #if defined(PETSC_HAVE_HDF5)
2399   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2400 #endif
2401   if (isbinary) {
2402     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2403 #if defined(PETSC_HAVE_HDF5)
2404   } else if (ishdf5) {
2405     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2406 #endif
2407   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) {
2412   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2413   Mat           a, b;
2414 
2415   PetscFunctionBegin;
2416   a = matA->A;
2417   b = matB->A;
2418   PetscCall(MatEqual(a, b, flag));
2419   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) {
2424   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2425 
2426   PetscFunctionBegin;
2427   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2428   PetscCall(MatDestroy(&atb->atb));
2429   PetscCall(PetscFree(atb));
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) {
2434   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2435 
2436   PetscFunctionBegin;
2437   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2438   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2439   PetscCall(PetscFree(abt));
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2444   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2445   Mat_TransMatMultDense *atb;
2446   MPI_Comm               comm;
2447   PetscMPIInt            size, *recvcounts;
2448   PetscScalar           *carray, *sendbuf;
2449   const PetscScalar     *atbarray;
2450   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2451   const PetscInt        *ranges;
2452 
2453   PetscFunctionBegin;
2454   MatCheckProduct(C, 3);
2455   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2456   atb        = (Mat_TransMatMultDense *)C->product->data;
2457   recvcounts = atb->recvcounts;
2458   sendbuf    = atb->sendbuf;
2459 
2460   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2461   PetscCallMPI(MPI_Comm_size(comm, &size));
2462 
2463   /* compute atbarray = aseq^T * bseq */
2464   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2465 
2466   PetscCall(MatGetOwnershipRanges(C, &ranges));
2467 
2468   /* arrange atbarray into sendbuf */
2469   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2470   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2471   for (proc = 0, k = 0; proc < size; proc++) {
2472     for (j = 0; j < cN; j++) {
2473       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2474     }
2475   }
2476   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2477 
2478   /* sum all atbarray to local values of C */
2479   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2480   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2481   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2482   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2483   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2488   MPI_Comm               comm;
2489   PetscMPIInt            size;
2490   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2491   Mat_TransMatMultDense *atb;
2492   PetscBool              cisdense;
2493   PetscInt               i;
2494   const PetscInt        *ranges;
2495 
2496   PetscFunctionBegin;
2497   MatCheckProduct(C, 4);
2498   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2499   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2500   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2501     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2502   }
2503 
2504   /* create matrix product C */
2505   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2506   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2507   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2508   PetscCall(MatSetUp(C));
2509 
2510   /* create data structure for reuse C */
2511   PetscCallMPI(MPI_Comm_size(comm, &size));
2512   PetscCall(PetscNew(&atb));
2513   cM = C->rmap->N;
2514   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2515   PetscCall(MatGetOwnershipRanges(C, &ranges));
2516   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2517 
2518   C->product->data    = atb;
2519   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2524   MPI_Comm               comm;
2525   PetscMPIInt            i, size;
2526   PetscInt               maxRows, bufsiz;
2527   PetscMPIInt            tag;
2528   PetscInt               alg;
2529   Mat_MatTransMultDense *abt;
2530   Mat_Product           *product = C->product;
2531   PetscBool              flg;
2532 
2533   PetscFunctionBegin;
2534   MatCheckProduct(C, 4);
2535   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2536   /* check local size of A and B */
2537   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2538 
2539   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2540   alg = flg ? 0 : 1;
2541 
2542   /* setup matrix product C */
2543   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2544   PetscCall(MatSetType(C, MATMPIDENSE));
2545   PetscCall(MatSetUp(C));
2546   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2547 
2548   /* create data structure for reuse C */
2549   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2550   PetscCallMPI(MPI_Comm_size(comm, &size));
2551   PetscCall(PetscNew(&abt));
2552   abt->tag = tag;
2553   abt->alg = alg;
2554   switch (alg) {
2555   case 1: /* alg: "cyclic" */
2556     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2557     bufsiz = A->cmap->N * maxRows;
2558     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2559     break;
2560   default: /* alg: "allgatherv" */
2561     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2562     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2563     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2564     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2565     break;
2566   }
2567 
2568   C->product->data                = abt;
2569   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2570   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) {
2575   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2576   Mat_MatTransMultDense *abt;
2577   MPI_Comm               comm;
2578   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2579   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2580   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2581   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2582   const PetscScalar     *av, *bv;
2583   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2584   MPI_Request            reqs[2];
2585   const PetscInt        *ranges;
2586 
2587   PetscFunctionBegin;
2588   MatCheckProduct(C, 3);
2589   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2590   abt = (Mat_MatTransMultDense *)C->product->data;
2591   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2592   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2593   PetscCallMPI(MPI_Comm_size(comm, &size));
2594   PetscCall(MatDenseGetArrayRead(a->A, &av));
2595   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2596   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2597   PetscCall(MatDenseGetLDA(a->A, &i));
2598   PetscCall(PetscBLASIntCast(i, &alda));
2599   PetscCall(MatDenseGetLDA(b->A, &i));
2600   PetscCall(PetscBLASIntCast(i, &blda));
2601   PetscCall(MatDenseGetLDA(c->A, &i));
2602   PetscCall(PetscBLASIntCast(i, &clda));
2603   PetscCall(MatGetOwnershipRanges(B, &ranges));
2604   bn = B->rmap->n;
2605   if (blda == bn) {
2606     sendbuf = (PetscScalar *)bv;
2607   } else {
2608     sendbuf = abt->buf[0];
2609     for (k = 0, i = 0; i < cK; i++) {
2610       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2611     }
2612   }
2613   if (size > 1) {
2614     sendto   = (rank + size - 1) % size;
2615     recvfrom = (rank + size + 1) % size;
2616   } else {
2617     sendto = recvfrom = 0;
2618   }
2619   PetscCall(PetscBLASIntCast(cK, &ck));
2620   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2621   recvisfrom = rank;
2622   for (i = 0; i < size; i++) {
2623     /* we have finished receiving in sending, bufs can be read/modified */
2624     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2625     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2626 
2627     if (nextrecvisfrom != rank) {
2628       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2629       sendsiz = cK * bn;
2630       recvsiz = cK * nextbn;
2631       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2632       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2633       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2634     }
2635 
2636     /* local aseq * sendbuf^T */
2637     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2638     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2639 
2640     if (nextrecvisfrom != rank) {
2641       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2642       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2643     }
2644     bn         = nextbn;
2645     recvisfrom = nextrecvisfrom;
2646     sendbuf    = recvbuf;
2647   }
2648   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2649   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2650   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2651   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2652   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) {
2657   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2658   Mat_MatTransMultDense *abt;
2659   MPI_Comm               comm;
2660   PetscMPIInt            size;
2661   PetscScalar           *cv, *sendbuf, *recvbuf;
2662   const PetscScalar     *av, *bv;
2663   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2664   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2665   PetscBLASInt           cm, cn, ck, alda, clda;
2666 
2667   PetscFunctionBegin;
2668   MatCheckProduct(C, 3);
2669   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2670   abt = (Mat_MatTransMultDense *)C->product->data;
2671   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2672   PetscCallMPI(MPI_Comm_size(comm, &size));
2673   PetscCall(MatDenseGetArrayRead(a->A, &av));
2674   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2675   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2676   PetscCall(MatDenseGetLDA(a->A, &i));
2677   PetscCall(PetscBLASIntCast(i, &alda));
2678   PetscCall(MatDenseGetLDA(b->A, &blda));
2679   PetscCall(MatDenseGetLDA(c->A, &i));
2680   PetscCall(PetscBLASIntCast(i, &clda));
2681   /* copy transpose of B into buf[0] */
2682   bn      = B->rmap->n;
2683   sendbuf = abt->buf[0];
2684   recvbuf = abt->buf[1];
2685   for (k = 0, j = 0; j < bn; j++) {
2686     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2687   }
2688   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2689   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2690   PetscCall(PetscBLASIntCast(cK, &ck));
2691   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2692   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2693   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2694   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2695   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2696   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2697   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2698   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2703   Mat_MatTransMultDense *abt;
2704 
2705   PetscFunctionBegin;
2706   MatCheckProduct(C, 3);
2707   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2708   abt = (Mat_MatTransMultDense *)C->product->data;
2709   switch (abt->alg) {
2710   case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break;
2711   default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break;
2712   }
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) {
2717   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2718 
2719   PetscFunctionBegin;
2720   PetscCall(MatDestroy(&ab->Ce));
2721   PetscCall(MatDestroy(&ab->Ae));
2722   PetscCall(MatDestroy(&ab->Be));
2723   PetscCall(PetscFree(ab));
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 #if defined(PETSC_HAVE_ELEMENTAL)
2728 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2729   Mat_MatMultDense *ab;
2730 
2731   PetscFunctionBegin;
2732   MatCheckProduct(C, 3);
2733   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2734   ab = (Mat_MatMultDense *)C->product->data;
2735   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2736   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2737   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2738   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2743   Mat               Ae, Be, Ce;
2744   Mat_MatMultDense *ab;
2745 
2746   PetscFunctionBegin;
2747   MatCheckProduct(C, 4);
2748   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2749   /* check local size of A and B */
2750   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2751     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2752   }
2753 
2754   /* create elemental matrices Ae and Be */
2755   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2756   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2757   PetscCall(MatSetType(Ae, MATELEMENTAL));
2758   PetscCall(MatSetUp(Ae));
2759   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2760 
2761   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2762   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2763   PetscCall(MatSetType(Be, MATELEMENTAL));
2764   PetscCall(MatSetUp(Be));
2765   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2766 
2767   /* compute symbolic Ce = Ae*Be */
2768   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2769   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2770 
2771   /* setup C */
2772   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2773   PetscCall(MatSetType(C, MATDENSE));
2774   PetscCall(MatSetUp(C));
2775 
2776   /* create data structure for reuse Cdense */
2777   PetscCall(PetscNew(&ab));
2778   ab->Ae = Ae;
2779   ab->Be = Be;
2780   ab->Ce = Ce;
2781 
2782   C->product->data       = ab;
2783   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2784   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2785   PetscFunctionReturn(0);
2786 }
2787 #endif
2788 /* ----------------------------------------------- */
2789 #if defined(PETSC_HAVE_ELEMENTAL)
2790 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) {
2791   PetscFunctionBegin;
2792   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2793   C->ops->productsymbolic = MatProductSymbolic_AB;
2794   PetscFunctionReturn(0);
2795 }
2796 #endif
2797 
2798 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) {
2799   Mat_Product *product = C->product;
2800   Mat          A = product->A, B = product->B;
2801 
2802   PetscFunctionBegin;
2803   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2804     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2805   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2806   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) {
2811   Mat_Product *product     = C->product;
2812   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2813   PetscInt     alg, nalg = 2;
2814   PetscBool    flg = PETSC_FALSE;
2815 
2816   PetscFunctionBegin;
2817   /* Set default algorithm */
2818   alg = 0; /* default is allgatherv */
2819   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2820   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2821 
2822   /* Get runtime option */
2823   if (product->api_user) {
2824     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2825     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2826     PetscOptionsEnd();
2827   } else {
2828     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2829     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2830     PetscOptionsEnd();
2831   }
2832   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2833 
2834   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2835   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) {
2840   Mat_Product *product = C->product;
2841 
2842   PetscFunctionBegin;
2843   switch (product->type) {
2844 #if defined(PETSC_HAVE_ELEMENTAL)
2845   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break;
2846 #endif
2847   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break;
2848   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break;
2849   default: break;
2850   }
2851   PetscFunctionReturn(0);
2852 }
2853