xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) {
25   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
26   PetscBool     flg;
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
30   PetscValidPointer(B, 2);
31   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
32   if (flg) *B = mat->A;
33   else {
34     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
35     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) {
42   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
43   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
44 
45   PetscFunctionBegin;
46   PetscCall(MatCopy(Amat->A, Bmat->A, s));
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) {
51   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
52   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
53   PetscScalar  *v;
54 
55   PetscFunctionBegin;
56   PetscCall(MatDenseGetArray(mat->A, &v));
57   PetscCall(MatDenseGetLDA(mat->A, &lda));
58   rend2 = PetscMin(rend, A->cmap->N);
59   if (rend2 > rstart) {
60     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
61     PetscCall(PetscLogFlops(rend2 - rstart));
62   }
63   PetscCall(MatDenseRestoreArray(mat->A, &v));
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
68   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
69   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
70 
71   PetscFunctionBegin;
72   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
73   lrow = row - rstart;
74   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
79   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
80   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
81 
82   PetscFunctionBegin;
83   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
84   lrow = row - rstart;
85   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) {
90   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
91   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
92   PetscScalar  *array;
93   MPI_Comm      comm;
94   PetscBool     flg;
95   Mat           B;
96 
97   PetscFunctionBegin;
98   PetscCall(MatHasCongruentLayouts(A, &flg));
99   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
100   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
101   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
102 
103     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
104     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
105     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
106     PetscCall(MatCreate(comm, &B));
107     PetscCall(MatSetSizes(B, m, m, m, m));
108     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
109     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
110     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
111     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
112     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
113     *a = B;
114     PetscCall(MatDestroy(&B));
115   } else *a = B;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) {
120   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
121   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
122   PetscBool     roworiented = A->roworiented;
123 
124   PetscFunctionBegin;
125   for (i = 0; i < m; i++) {
126     if (idxm[i] < 0) continue;
127     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
128     if (idxm[i] >= rstart && idxm[i] < rend) {
129       row = idxm[i] - rstart;
130       if (roworiented) {
131         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
132       } else {
133         for (j = 0; j < n; j++) {
134           if (idxn[j] < 0) continue;
135           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
136           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
137         }
138       }
139     } else if (!A->donotstash) {
140       mat->assembled = PETSC_FALSE;
141       if (roworiented) {
142         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
143       } else {
144         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
145       }
146     }
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
152   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
153   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
154 
155   PetscFunctionBegin;
156   for (i = 0; i < m; i++) {
157     if (idxm[i] < 0) continue; /* negative row */
158     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
159     if (idxm[i] >= rstart && idxm[i] < rend) {
160       row = idxm[i] - rstart;
161       for (j = 0; j < n; j++) {
162         if (idxn[j] < 0) continue; /* negative column */
163         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
164         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
165       }
166     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) {
172   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
173 
174   PetscFunctionBegin;
175   PetscCall(MatDenseGetLDA(a->A, lda));
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) {
180   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
181   PetscBool     iscuda;
182 
183   PetscFunctionBegin;
184   if (!a->A) {
185     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
186     PetscCall(PetscLayoutSetUp(A->rmap));
187     PetscCall(PetscLayoutSetUp(A->cmap));
188     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
189     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A));
190     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
191     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
192     PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
193   }
194   PetscCall(MatDenseSetLDA(a->A, lda));
195   PetscFunctionReturn(0);
196 }
197 
198 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) {
199   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
200 
201   PetscFunctionBegin;
202   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203   PetscCall(MatDenseGetArray(a->A, array));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) {
208   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
209 
210   PetscFunctionBegin;
211   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
212   PetscCall(MatDenseGetArrayRead(a->A, array));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) {
217   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
218 
219   PetscFunctionBegin;
220   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
221   PetscCall(MatDenseGetArrayWrite(a->A, array));
222   PetscFunctionReturn(0);
223 }
224 
225 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) {
226   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
227 
228   PetscFunctionBegin;
229   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
230   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
231   PetscCall(MatDensePlaceArray(a->A, array));
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) {
236   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
237 
238   PetscFunctionBegin;
239   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
240   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
241   PetscCall(MatDenseResetArray(a->A));
242   PetscFunctionReturn(0);
243 }
244 
245 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) {
246   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
247 
248   PetscFunctionBegin;
249   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
250   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
251   PetscCall(MatDenseReplaceArray(a->A, array));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) {
256   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
257   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
258   const PetscInt    *irow, *icol;
259   const PetscScalar *v;
260   PetscScalar       *bv;
261   Mat                newmat;
262   IS                 iscol_local;
263   MPI_Comm           comm_is, comm_mat;
264 
265   PetscFunctionBegin;
266   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
267   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
268   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
269 
270   PetscCall(ISAllGather(iscol, &iscol_local));
271   PetscCall(ISGetIndices(isrow, &irow));
272   PetscCall(ISGetIndices(iscol_local, &icol));
273   PetscCall(ISGetLocalSize(isrow, &nrows));
274   PetscCall(ISGetLocalSize(iscol, &ncols));
275   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
276 
277   /* No parallel redistribution currently supported! Should really check each index set
278      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
279      original matrix! */
280 
281   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
282   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
283 
284   /* Check submatrix call */
285   if (scall == MAT_REUSE_MATRIX) {
286     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
287     /* Really need to test rows and column sizes! */
288     newmat = *B;
289   } else {
290     /* Create and fill new matrix */
291     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
292     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
293     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
294     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
295   }
296 
297   /* Now extract the data pointers and do the copy, column at a time */
298   newmatd = (Mat_MPIDense *)newmat->data;
299   PetscCall(MatDenseGetArray(newmatd->A, &bv));
300   PetscCall(MatDenseGetArrayRead(mat->A, &v));
301   PetscCall(MatDenseGetLDA(mat->A, &lda));
302   for (i = 0; i < Ncols; i++) {
303     const PetscScalar *av = v + lda * icol[i];
304     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
305   }
306   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
307   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
308 
309   /* Assemble the matrices so that the correct flags are set */
310   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
311   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
312 
313   /* Free work space */
314   PetscCall(ISRestoreIndices(isrow, &irow));
315   PetscCall(ISRestoreIndices(iscol_local, &icol));
316   PetscCall(ISDestroy(&iscol_local));
317   *B = newmat;
318   PetscFunctionReturn(0);
319 }
320 
321 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) {
322   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
323 
324   PetscFunctionBegin;
325   PetscCall(MatDenseRestoreArray(a->A, array));
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) {
330   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
331 
332   PetscFunctionBegin;
333   PetscCall(MatDenseRestoreArrayRead(a->A, array));
334   PetscFunctionReturn(0);
335 }
336 
337 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) {
338   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
339 
340   PetscFunctionBegin;
341   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
342   PetscFunctionReturn(0);
343 }
344 
345 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) {
346   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
347   PetscInt      nstash, reallocs;
348 
349   PetscFunctionBegin;
350   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
351 
352   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
353   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
354   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) {
359   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
360   PetscInt      i, *row, *col, flg, j, rstart, ncols;
361   PetscMPIInt   n;
362   PetscScalar  *val;
363 
364   PetscFunctionBegin;
365   if (!mdn->donotstash && !mat->nooffprocentries) {
366     /*  wait on receives */
367     while (1) {
368       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
369       if (!flg) break;
370 
371       for (i = 0; i < n;) {
372         /* Now identify the consecutive vals belonging to the same row */
373         for (j = i, rstart = row[j]; j < n; j++) {
374           if (row[j] != rstart) break;
375         }
376         if (j < n) ncols = j - i;
377         else ncols = n - i;
378         /* Now assemble all these values with a single function call */
379         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
380         i = j;
381       }
382     }
383     PetscCall(MatStashScatterEnd_Private(&mat->stash));
384   }
385 
386   PetscCall(MatAssemblyBegin(mdn->A, mode));
387   PetscCall(MatAssemblyEnd(mdn->A, mode));
388   PetscFunctionReturn(0);
389 }
390 
391 PetscErrorCode MatZeroEntries_MPIDense(Mat A) {
392   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
393 
394   PetscFunctionBegin;
395   PetscCall(MatZeroEntries(l->A));
396   PetscFunctionReturn(0);
397 }
398 
399 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
400   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
401   PetscInt      i, len, *lrows;
402 
403   PetscFunctionBegin;
404   /* get locally owned rows */
405   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
406   /* fix right hand side if needed */
407   if (x && b) {
408     const PetscScalar *xx;
409     PetscScalar       *bb;
410 
411     PetscCall(VecGetArrayRead(x, &xx));
412     PetscCall(VecGetArrayWrite(b, &bb));
413     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
414     PetscCall(VecRestoreArrayRead(x, &xx));
415     PetscCall(VecRestoreArrayWrite(b, &bb));
416   }
417   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
418   if (diag != 0.0) {
419     Vec d;
420 
421     PetscCall(MatCreateVecs(A, NULL, &d));
422     PetscCall(VecSet(d, diag));
423     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
424     PetscCall(VecDestroy(&d));
425   }
426   PetscCall(PetscFree(lrows));
427   PetscFunctionReturn(0);
428 }
429 
430 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
431 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
432 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
433 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
434 
435 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) {
436   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
437   const PetscScalar *ax;
438   PetscScalar       *ay;
439   PetscMemType       axmtype, aymtype;
440 
441   PetscFunctionBegin;
442   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
443   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
444   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
445   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
446   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
447   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
448   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
449   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) {
454   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
455   const PetscScalar *ax;
456   PetscScalar       *ay;
457   PetscMemType       axmtype, aymtype;
458 
459   PetscFunctionBegin;
460   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
461   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
462   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
463   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
464   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
465   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
466   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
467   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
468   PetscFunctionReturn(0);
469 }
470 
471 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) {
472   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
473   const PetscScalar *ax;
474   PetscScalar       *ay;
475   PetscMemType       axmtype, aymtype;
476 
477   PetscFunctionBegin;
478   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
479   PetscCall(VecSet(yy, 0.0));
480   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
481   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
483   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
484   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
485   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
486   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
487   PetscFunctionReturn(0);
488 }
489 
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) {
491   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
492   const PetscScalar *ax;
493   PetscScalar       *ay;
494   PetscMemType       axmtype, aymtype;
495 
496   PetscFunctionBegin;
497   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
498   PetscCall(VecCopy(yy, zz));
499   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
500   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
502   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
503   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
504   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
505   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) {
510   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
511   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
512   PetscScalar       *x, zero = 0.0;
513   const PetscScalar *av;
514 
515   PetscFunctionBegin;
516   PetscCall(VecSet(v, zero));
517   PetscCall(VecGetArray(v, &x));
518   PetscCall(VecGetSize(v, &n));
519   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
520   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
521   radd = A->rmap->rstart * m;
522   PetscCall(MatDenseGetArrayRead(a->A, &av));
523   PetscCall(MatDenseGetLDA(a->A, &lda));
524   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
525   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
526   PetscCall(VecRestoreArray(v, &x));
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatDestroy_MPIDense(Mat mat) {
531   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
532 
533   PetscFunctionBegin;
534 #if defined(PETSC_USE_LOG)
535   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
536 #endif
537   PetscCall(MatStashDestroy_Private(&mat->stash));
538   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
539   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
540   PetscCall(MatDestroy(&mdn->A));
541   PetscCall(VecDestroy(&mdn->lvec));
542   PetscCall(PetscSFDestroy(&mdn->Mvctx));
543   PetscCall(VecDestroy(&mdn->cvec));
544   PetscCall(MatDestroy(&mdn->cmat));
545 
546   PetscCall(PetscFree(mat->data));
547   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
548 
549   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
550   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
551   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
552   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
553   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
554   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
555   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
556   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
557   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
558   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
559   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
560   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
561   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
562 #if defined(PETSC_HAVE_ELEMENTAL)
563   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
564 #endif
565 #if defined(PETSC_HAVE_SCALAPACK)
566   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
567 #endif
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
570   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
571 #if defined(PETSC_HAVE_CUDA)
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
578   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
580   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
581   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
582   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
589 #endif
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
600 
601   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
602   PetscFunctionReturn(0);
603 }
604 
605 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
606 
607 #include <petscdraw.h>
608 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
609   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
610   PetscMPIInt       rank;
611   PetscViewerType   vtype;
612   PetscBool         iascii, isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
618   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
619   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
620   if (iascii) {
621     PetscCall(PetscViewerGetType(viewer, &vtype));
622     PetscCall(PetscViewerGetFormat(viewer, &format));
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
626       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
627       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
628                                                    (PetscInt)info.memory));
629       PetscCall(PetscViewerFlush(viewer));
630       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
631       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
632       PetscFunctionReturn(0);
633     } else if (format == PETSC_VIEWER_ASCII_INFO) {
634       PetscFunctionReturn(0);
635     }
636   } else if (isdraw) {
637     PetscDraw draw;
638     PetscBool isnull;
639 
640     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
641     PetscCall(PetscDrawIsNull(draw, &isnull));
642     if (isnull) PetscFunctionReturn(0);
643   }
644 
645   {
646     /* assemble the entire matrix onto first processor. */
647     Mat          A;
648     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
649     PetscInt    *cols;
650     PetscScalar *vals;
651 
652     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
653     if (rank == 0) {
654       PetscCall(MatSetSizes(A, M, N, M, N));
655     } else {
656       PetscCall(MatSetSizes(A, 0, 0, M, N));
657     }
658     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
659     PetscCall(MatSetType(A, MATMPIDENSE));
660     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
661     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A));
662 
663     /* Copy the matrix ... This isn't the most efficient means,
664        but it's quick for now */
665     A->insertmode = INSERT_VALUES;
666 
667     row = mat->rmap->rstart;
668     m   = mdn->A->rmap->n;
669     for (i = 0; i < m; i++) {
670       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
671       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
672       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
673       row++;
674     }
675 
676     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
677     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
678     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
679     if (rank == 0) {
680       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
681       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
682     }
683     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
684     PetscCall(PetscViewerFlush(viewer));
685     PetscCall(MatDestroy(&A));
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) {
691   PetscBool iascii, isbinary, isdraw, issocket;
692 
693   PetscFunctionBegin;
694   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
695   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
696   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
697   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
698 
699   if (iascii || issocket || isdraw) {
700     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
701   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
702   PetscFunctionReturn(0);
703 }
704 
705 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) {
706   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
707   Mat            mdn = mat->A;
708   PetscLogDouble isend[5], irecv[5];
709 
710   PetscFunctionBegin;
711   info->block_size = 1.0;
712 
713   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
714 
715   isend[0] = info->nz_used;
716   isend[1] = info->nz_allocated;
717   isend[2] = info->nz_unneeded;
718   isend[3] = info->memory;
719   isend[4] = info->mallocs;
720   if (flag == MAT_LOCAL) {
721     info->nz_used      = isend[0];
722     info->nz_allocated = isend[1];
723     info->nz_unneeded  = isend[2];
724     info->memory       = isend[3];
725     info->mallocs      = isend[4];
726   } else if (flag == MAT_GLOBAL_MAX) {
727     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
728 
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
736 
737     info->nz_used      = irecv[0];
738     info->nz_allocated = irecv[1];
739     info->nz_unneeded  = irecv[2];
740     info->memory       = irecv[3];
741     info->mallocs      = irecv[4];
742   }
743   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
744   info->fill_ratio_needed = 0;
745   info->factor_mallocs    = 0;
746   PetscFunctionReturn(0);
747 }
748 
749 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) {
750   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
751 
752   PetscFunctionBegin;
753   switch (op) {
754   case MAT_NEW_NONZERO_LOCATIONS:
755   case MAT_NEW_NONZERO_LOCATION_ERR:
756   case MAT_NEW_NONZERO_ALLOCATION_ERR:
757     MatCheckPreallocated(A, 1);
758     PetscCall(MatSetOption(a->A, op, flg));
759     break;
760   case MAT_ROW_ORIENTED:
761     MatCheckPreallocated(A, 1);
762     a->roworiented = flg;
763     PetscCall(MatSetOption(a->A, op, flg));
764     break;
765   case MAT_FORCE_DIAGONAL_ENTRIES:
766   case MAT_KEEP_NONZERO_PATTERN:
767   case MAT_USE_HASH_TABLE:
768   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
769   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
770   case MAT_SYMMETRIC:
771   case MAT_STRUCTURALLY_SYMMETRIC:
772   case MAT_HERMITIAN:
773   case MAT_SYMMETRY_ETERNAL:
774   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
775   case MAT_SPD:
776   case MAT_IGNORE_LOWER_TRIANGULAR:
777   case MAT_IGNORE_ZERO_ENTRIES:
778   case MAT_SPD_ETERNAL:
779     /* if the diagonal matrix is square it inherits some of the properties above */
780     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
781     break;
782   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) {
788   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
789   const PetscScalar *l;
790   PetscScalar        x, *v, *vv, *r;
791   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
792 
793   PetscFunctionBegin;
794   PetscCall(MatDenseGetArray(mdn->A, &vv));
795   PetscCall(MatDenseGetLDA(mdn->A, &lda));
796   PetscCall(MatGetLocalSize(A, &s2, &s3));
797   if (ll) {
798     PetscCall(VecGetLocalSize(ll, &s2a));
799     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
800     PetscCall(VecGetArrayRead(ll, &l));
801     for (i = 0; i < m; i++) {
802       x = l[i];
803       v = vv + i;
804       for (j = 0; j < n; j++) {
805         (*v) *= x;
806         v += lda;
807       }
808     }
809     PetscCall(VecRestoreArrayRead(ll, &l));
810     PetscCall(PetscLogFlops(1.0 * n * m));
811   }
812   if (rr) {
813     const PetscScalar *ar;
814 
815     PetscCall(VecGetLocalSize(rr, &s3a));
816     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
817     PetscCall(VecGetArrayRead(rr, &ar));
818     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
819     PetscCall(VecGetArray(mdn->lvec, &r));
820     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
821     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
822     PetscCall(VecRestoreArrayRead(rr, &ar));
823     for (i = 0; i < n; i++) {
824       x = r[i];
825       v = vv + i * lda;
826       for (j = 0; j < m; j++) (*v++) *= x;
827     }
828     PetscCall(VecRestoreArray(mdn->lvec, &r));
829     PetscCall(PetscLogFlops(1.0 * n * m));
830   }
831   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) {
836   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
837   PetscInt           i, j;
838   PetscMPIInt        size;
839   PetscReal          sum = 0.0;
840   const PetscScalar *av, *v;
841 
842   PetscFunctionBegin;
843   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
844   v = av;
845   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
846   if (size == 1) {
847     PetscCall(MatNorm(mdn->A, type, nrm));
848   } else {
849     if (type == NORM_FROBENIUS) {
850       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
851         sum += PetscRealPart(PetscConj(*v) * (*v));
852         v++;
853       }
854       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
855       *nrm = PetscSqrtReal(*nrm);
856       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
857     } else if (type == NORM_1) {
858       PetscReal *tmp, *tmp2;
859       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
860       *nrm = 0.0;
861       v    = av;
862       for (j = 0; j < mdn->A->cmap->n; j++) {
863         for (i = 0; i < mdn->A->rmap->n; i++) {
864           tmp[j] += PetscAbsScalar(*v);
865           v++;
866         }
867       }
868       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
869       for (j = 0; j < A->cmap->N; j++) {
870         if (tmp2[j] > *nrm) *nrm = tmp2[j];
871       }
872       PetscCall(PetscFree2(tmp, tmp2));
873       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
874     } else if (type == NORM_INFINITY) { /* max row norm */
875       PetscReal ntemp;
876       PetscCall(MatNorm(mdn->A, type, &ntemp));
877       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
878     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
879   }
880   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) {
885   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
886   Mat           B;
887   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
888   PetscInt      j, i, lda;
889   PetscScalar  *v;
890 
891   PetscFunctionBegin;
892   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
893   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
894     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
895     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
896     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
897     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
898   } else B = *matout;
899 
900   m = a->A->rmap->n;
901   n = a->A->cmap->n;
902   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
903   PetscCall(MatDenseGetLDA(a->A, &lda));
904   PetscCall(PetscMalloc1(m, &rwork));
905   for (i = 0; i < m; i++) rwork[i] = rstart + i;
906   for (j = 0; j < n; j++) {
907     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
908     v += lda;
909   }
910   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
911   PetscCall(PetscFree(rwork));
912   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
913   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
914   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
915     *matout = B;
916   } else {
917     PetscCall(MatHeaderMerge(A, &B));
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
924 
925 PetscErrorCode MatSetUp_MPIDense(Mat A) {
926   PetscFunctionBegin;
927   PetscCall(PetscLayoutSetUp(A->rmap));
928   PetscCall(PetscLayoutSetUp(A->cmap));
929   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
934   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
935 
936   PetscFunctionBegin;
937   PetscCall(MatAXPY(A->A, alpha, B->A, str));
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatConjugate_MPIDense(Mat mat) {
942   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
943 
944   PetscFunctionBegin;
945   PetscCall(MatConjugate(a->A));
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatRealPart_MPIDense(Mat A) {
950   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
951 
952   PetscFunctionBegin;
953   PetscCall(MatRealPart(a->A));
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959 
960   PetscFunctionBegin;
961   PetscCall(MatImaginaryPart(a->A));
962   PetscFunctionReturn(0);
963 }
964 
965 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) {
966   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
967 
968   PetscFunctionBegin;
969   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
970   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
971   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
972   PetscFunctionReturn(0);
973 }
974 
975 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
976 
977 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) {
978   PetscInt      i, m, n;
979   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
980   PetscReal    *work;
981 
982   PetscFunctionBegin;
983   PetscCall(MatGetSize(A, &m, &n));
984   PetscCall(PetscMalloc1(n, &work));
985   if (type == REDUCTION_MEAN_REALPART) {
986     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
987   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
988     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
989   } else {
990     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
991   }
992   if (type == NORM_2) {
993     for (i = 0; i < n; i++) work[i] *= work[i];
994   }
995   if (type == NORM_INFINITY) {
996     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
997   } else {
998     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
999   }
1000   PetscCall(PetscFree(work));
1001   if (type == NORM_2) {
1002     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1003   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1004     for (i = 0; i < n; i++) reductions[i] /= m;
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #if defined(PETSC_HAVE_CUDA)
1010 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) {
1011   PetscScalar *da;
1012   PetscInt     lda;
1013 
1014   PetscFunctionBegin;
1015   PetscCall(MatDenseCUDAGetArray(A, &da));
1016   PetscCall(MatDenseGetLDA(A, &lda));
1017   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1018   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1019   PetscCall(MatDenseCUDARestoreArray(A, &da));
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1024   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1025   PetscInt      lda;
1026 
1027   PetscFunctionBegin;
1028   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1029   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1030   if (!a->cvec) {
1031     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1032     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1033   }
1034   a->vecinuse = col + 1;
1035   PetscCall(MatDenseGetLDA(a->A, &lda));
1036   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1037   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1038   *v = a->cvec;
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1043   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1044 
1045   PetscFunctionBegin;
1046   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1047   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1048   a->vecinuse = 0;
1049   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1050   PetscCall(VecCUDAResetArray(a->cvec));
1051   if (v) *v = NULL;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1056   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1057   PetscInt      lda;
1058 
1059   PetscFunctionBegin;
1060   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1061   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1062   if (!a->cvec) {
1063     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1064     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1065   }
1066   a->vecinuse = col + 1;
1067   PetscCall(MatDenseGetLDA(a->A, &lda));
1068   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1069   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1070   PetscCall(VecLockReadPush(a->cvec));
1071   *v = a->cvec;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1076   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1077 
1078   PetscFunctionBegin;
1079   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1080   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1081   a->vecinuse = 0;
1082   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1083   PetscCall(VecLockReadPop(a->cvec));
1084   PetscCall(VecCUDAResetArray(a->cvec));
1085   if (v) *v = NULL;
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1090   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1091   PetscInt      lda;
1092 
1093   PetscFunctionBegin;
1094   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1095   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1096   if (!a->cvec) {
1097     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1098     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1099   }
1100   a->vecinuse = col + 1;
1101   PetscCall(MatDenseGetLDA(a->A, &lda));
1102   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1103   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1104   *v = a->cvec;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1109   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1113   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1114   a->vecinuse = 0;
1115   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1116   PetscCall(VecCUDAResetArray(a->cvec));
1117   if (v) *v = NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1122   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1123 
1124   PetscFunctionBegin;
1125   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1126   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1127   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) {
1132   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1133 
1134   PetscFunctionBegin;
1135   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1136   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1137   PetscCall(MatDenseCUDAResetArray(l->A));
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1142   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1143 
1144   PetscFunctionBegin;
1145   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1146   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1147   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1152   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1160   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1168   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1176   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1184   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatDenseCUDAGetArray(l->A, a));
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1192   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1200 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1201 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1202 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1203 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1204 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1205 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1206 
1207 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) {
1208   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1209 
1210   PetscFunctionBegin;
1211   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1212   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1213   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1214   mat->boundtocpu = bind;
1215   if (!bind) {
1216     PetscBool iscuda;
1217 
1218     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1219     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1220     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1221     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1222     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1223     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1224     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1225     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1226     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1227     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1228     mat->ops->shift = MatShift_MPIDenseCUDA;
1229   } else {
1230     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1231     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1232     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1233     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1234     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1235     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1236     mat->ops->shift = MatShift_MPIDense;
1237   }
1238   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) {
1243   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1244   PetscBool     iscuda;
1245 
1246   PetscFunctionBegin;
1247   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1248   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1249   if (!iscuda) PetscFunctionReturn(0);
1250   PetscCall(PetscLayoutSetUp(A->rmap));
1251   PetscCall(PetscLayoutSetUp(A->cmap));
1252   if (!d->A) {
1253     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1254     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A));
1255     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1256   }
1257   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1258   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1259   A->preallocated = PETSC_TRUE;
1260   A->assembled    = PETSC_TRUE;
1261   PetscFunctionReturn(0);
1262 }
1263 #endif
1264 
1265 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) {
1266   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1267 
1268   PetscFunctionBegin;
1269   PetscCall(MatSetRandom(d->A, rctx));
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) {
1274   PetscFunctionBegin;
1275   *missing = PETSC_FALSE;
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1280 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1281 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1282 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1283 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1284 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1285 
1286 /* -------------------------------------------------------------------*/
1287 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1288                                        MatGetRow_MPIDense,
1289                                        MatRestoreRow_MPIDense,
1290                                        MatMult_MPIDense,
1291                                        /*  4*/ MatMultAdd_MPIDense,
1292                                        MatMultTranspose_MPIDense,
1293                                        MatMultTransposeAdd_MPIDense,
1294                                        NULL,
1295                                        NULL,
1296                                        NULL,
1297                                        /* 10*/ NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        NULL,
1301                                        MatTranspose_MPIDense,
1302                                        /* 15*/ MatGetInfo_MPIDense,
1303                                        MatEqual_MPIDense,
1304                                        MatGetDiagonal_MPIDense,
1305                                        MatDiagonalScale_MPIDense,
1306                                        MatNorm_MPIDense,
1307                                        /* 20*/ MatAssemblyBegin_MPIDense,
1308                                        MatAssemblyEnd_MPIDense,
1309                                        MatSetOption_MPIDense,
1310                                        MatZeroEntries_MPIDense,
1311                                        /* 24*/ MatZeroRows_MPIDense,
1312                                        NULL,
1313                                        NULL,
1314                                        NULL,
1315                                        NULL,
1316                                        /* 29*/ MatSetUp_MPIDense,
1317                                        NULL,
1318                                        NULL,
1319                                        MatGetDiagonalBlock_MPIDense,
1320                                        NULL,
1321                                        /* 34*/ MatDuplicate_MPIDense,
1322                                        NULL,
1323                                        NULL,
1324                                        NULL,
1325                                        NULL,
1326                                        /* 39*/ MatAXPY_MPIDense,
1327                                        MatCreateSubMatrices_MPIDense,
1328                                        NULL,
1329                                        MatGetValues_MPIDense,
1330                                        MatCopy_MPIDense,
1331                                        /* 44*/ NULL,
1332                                        MatScale_MPIDense,
1333                                        MatShift_MPIDense,
1334                                        NULL,
1335                                        NULL,
1336                                        /* 49*/ MatSetRandom_MPIDense,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        /* 54*/ NULL,
1342                                        NULL,
1343                                        NULL,
1344                                        NULL,
1345                                        NULL,
1346                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1347                                        MatDestroy_MPIDense,
1348                                        MatView_MPIDense,
1349                                        NULL,
1350                                        NULL,
1351                                        /* 64*/ NULL,
1352                                        NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        /* 69*/ NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        /* 74*/ NULL,
1362                                        NULL,
1363                                        NULL,
1364                                        NULL,
1365                                        NULL,
1366                                        /* 79*/ NULL,
1367                                        NULL,
1368                                        NULL,
1369                                        NULL,
1370                                        /* 83*/ MatLoad_MPIDense,
1371                                        NULL,
1372                                        NULL,
1373                                        NULL,
1374                                        NULL,
1375                                        NULL,
1376                                        /* 89*/ NULL,
1377                                        NULL,
1378                                        NULL,
1379                                        NULL,
1380                                        NULL,
1381                                        /* 94*/ NULL,
1382                                        NULL,
1383                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1384                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1385                                        NULL,
1386                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1387                                        NULL,
1388                                        NULL,
1389                                        MatConjugate_MPIDense,
1390                                        NULL,
1391                                        /*104*/ NULL,
1392                                        MatRealPart_MPIDense,
1393                                        MatImaginaryPart_MPIDense,
1394                                        NULL,
1395                                        NULL,
1396                                        /*109*/ NULL,
1397                                        NULL,
1398                                        NULL,
1399                                        MatGetColumnVector_MPIDense,
1400                                        MatMissingDiagonal_MPIDense,
1401                                        /*114*/ NULL,
1402                                        NULL,
1403                                        NULL,
1404                                        NULL,
1405                                        NULL,
1406                                        /*119*/ NULL,
1407                                        NULL,
1408                                        NULL,
1409                                        NULL,
1410                                        NULL,
1411                                        /*124*/ NULL,
1412                                        MatGetColumnReductions_MPIDense,
1413                                        NULL,
1414                                        NULL,
1415                                        NULL,
1416                                        /*129*/ NULL,
1417                                        NULL,
1418                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1419                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1420                                        NULL,
1421                                        /*134*/ NULL,
1422                                        NULL,
1423                                        NULL,
1424                                        NULL,
1425                                        NULL,
1426                                        /*139*/ NULL,
1427                                        NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        NULL,
1431                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1432                                        /*145*/ NULL,
1433                                        NULL,
1434                                        NULL,
1435                                        NULL,
1436                                        NULL,
1437                                        /*150*/ NULL};
1438 
1439 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) {
1440   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1441   PetscBool     iscuda;
1442 
1443   PetscFunctionBegin;
1444   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1445   PetscCall(PetscLayoutSetUp(mat->rmap));
1446   PetscCall(PetscLayoutSetUp(mat->cmap));
1447   if (!a->A) {
1448     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1449     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
1450     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1451   }
1452   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1453   PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1454   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1455   mat->preallocated = PETSC_TRUE;
1456   mat->assembled    = PETSC_TRUE;
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1461   Mat B, C;
1462 
1463   PetscFunctionBegin;
1464   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1465   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1466   PetscCall(MatDestroy(&C));
1467   if (reuse == MAT_REUSE_MATRIX) {
1468     C = *newmat;
1469   } else C = NULL;
1470   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1471   PetscCall(MatDestroy(&B));
1472   if (reuse == MAT_INPLACE_MATRIX) {
1473     PetscCall(MatHeaderReplace(A, &C));
1474   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1479   Mat B, C;
1480 
1481   PetscFunctionBegin;
1482   PetscCall(MatDenseGetLocalMatrix(A, &C));
1483   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1484   if (reuse == MAT_REUSE_MATRIX) {
1485     C = *newmat;
1486   } else C = NULL;
1487   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1488   PetscCall(MatDestroy(&B));
1489   if (reuse == MAT_INPLACE_MATRIX) {
1490     PetscCall(MatHeaderReplace(A, &C));
1491   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #if defined(PETSC_HAVE_ELEMENTAL)
1496 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1497   Mat          mat_elemental;
1498   PetscScalar *v;
1499   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1500 
1501   PetscFunctionBegin;
1502   if (reuse == MAT_REUSE_MATRIX) {
1503     mat_elemental = *newmat;
1504     PetscCall(MatZeroEntries(*newmat));
1505   } else {
1506     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1507     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1508     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1509     PetscCall(MatSetUp(mat_elemental));
1510     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1511   }
1512 
1513   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1514   for (i = 0; i < N; i++) cols[i] = i;
1515   for (i = 0; i < m; i++) rows[i] = rstart + i;
1516 
1517   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1518   PetscCall(MatDenseGetArray(A, &v));
1519   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1520   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1521   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1522   PetscCall(MatDenseRestoreArray(A, &v));
1523   PetscCall(PetscFree2(rows, cols));
1524 
1525   if (reuse == MAT_INPLACE_MATRIX) {
1526     PetscCall(MatHeaderReplace(A, &mat_elemental));
1527   } else {
1528     *newmat = mat_elemental;
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 #endif
1533 
1534 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) {
1535   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1536 
1537   PetscFunctionBegin;
1538   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) {
1543   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1544 
1545   PetscFunctionBegin;
1546   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
1551   Mat_MPIDense *mat;
1552   PetscInt      m, nloc, N;
1553 
1554   PetscFunctionBegin;
1555   PetscCall(MatGetSize(inmat, &m, &N));
1556   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1557   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1558     PetscInt sum;
1559 
1560     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1561     /* Check sum(n) = N */
1562     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1563     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1564 
1565     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1566     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1567   }
1568 
1569   /* numeric phase */
1570   mat = (Mat_MPIDense *)(*outmat)->data;
1571   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #if defined(PETSC_HAVE_CUDA)
1576 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1577   Mat           B;
1578   Mat_MPIDense *m;
1579 
1580   PetscFunctionBegin;
1581   if (reuse == MAT_INITIAL_MATRIX) {
1582     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1583   } else if (reuse == MAT_REUSE_MATRIX) {
1584     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1585   }
1586 
1587   B = *newmat;
1588   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1589   PetscCall(PetscFree(B->defaultvectype));
1590   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1591   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1597   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1598   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1599   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1600   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1601   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1602   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1603   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1604   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1605   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1606   m = (Mat_MPIDense *)(B)->data;
1607   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1608   B->ops->bindtocpu = NULL;
1609   B->offloadmask    = PETSC_OFFLOAD_CPU;
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1614   Mat           B;
1615   Mat_MPIDense *m;
1616 
1617   PetscFunctionBegin;
1618   if (reuse == MAT_INITIAL_MATRIX) {
1619     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1620   } else if (reuse == MAT_REUSE_MATRIX) {
1621     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1622   }
1623 
1624   B = *newmat;
1625   PetscCall(PetscFree(B->defaultvectype));
1626   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
1627   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
1642   m = (Mat_MPIDense *)(B->data);
1643   if (m->A) {
1644     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
1645     B->offloadmask = PETSC_OFFLOAD_BOTH;
1646   } else {
1647     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1648   }
1649   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
1650 
1651   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1652   PetscFunctionReturn(0);
1653 }
1654 #endif
1655 
1656 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1657   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1658   PetscInt      lda;
1659 
1660   PetscFunctionBegin;
1661   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1662   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1663   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1664   a->vecinuse = col + 1;
1665   PetscCall(MatDenseGetLDA(a->A, &lda));
1666   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1667   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1668   *v = a->cvec;
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1673   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1674 
1675   PetscFunctionBegin;
1676   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1677   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1678   a->vecinuse = 0;
1679   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1680   PetscCall(VecResetArray(a->cvec));
1681   if (v) *v = NULL;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1686   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1687   PetscInt      lda;
1688 
1689   PetscFunctionBegin;
1690   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1691   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1692   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1693   a->vecinuse = col + 1;
1694   PetscCall(MatDenseGetLDA(a->A, &lda));
1695   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1696   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1697   PetscCall(VecLockReadPush(a->cvec));
1698   *v = a->cvec;
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1703   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1704 
1705   PetscFunctionBegin;
1706   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1707   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1708   a->vecinuse = 0;
1709   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1710   PetscCall(VecLockReadPop(a->cvec));
1711   PetscCall(VecResetArray(a->cvec));
1712   if (v) *v = NULL;
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1717   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1718   PetscInt      lda;
1719 
1720   PetscFunctionBegin;
1721   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1722   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1723   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1724   a->vecinuse = col + 1;
1725   PetscCall(MatDenseGetLDA(a->A, &lda));
1726   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1727   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1728   *v = a->cvec;
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1733   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1734 
1735   PetscFunctionBegin;
1736   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1737   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1738   a->vecinuse = 0;
1739   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1740   PetscCall(VecResetArray(a->cvec));
1741   if (v) *v = NULL;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) {
1746   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1747   Mat_MPIDense *c;
1748   MPI_Comm      comm;
1749   PetscInt      pbegin, pend;
1750 
1751   PetscFunctionBegin;
1752   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1753   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1754   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1755   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1756   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1757   if (!a->cmat) {
1758     PetscCall(MatCreate(comm, &a->cmat));
1759     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat));
1760     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1761     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1762     else {
1763       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1764       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1765       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1766     }
1767     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1768     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1769   } else {
1770     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1771     if (same && a->cmat->rmap->N != A->rmap->N) {
1772       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1773       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1774     }
1775     if (!same) {
1776       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1777       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1778       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1779       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1780       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1781     }
1782     if (cend - cbegin != a->cmat->cmap->N) {
1783       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1784       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1785       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1786       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1787     }
1788   }
1789   c = (Mat_MPIDense *)a->cmat->data;
1790   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1791   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1792   a->cmat->preallocated = PETSC_TRUE;
1793   a->cmat->assembled    = PETSC_TRUE;
1794   a->matinuse           = cbegin + 1;
1795   *v                    = a->cmat;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) {
1800   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1801   Mat_MPIDense *c;
1802 
1803   PetscFunctionBegin;
1804   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1805   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1806   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1807   a->matinuse = 0;
1808   c           = (Mat_MPIDense *)a->cmat->data;
1809   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1810   if (v) *v = NULL;
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 /*MC
1815    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1816 
1817    Options Database Keys:
1818 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1819 
1820   Level: beginner
1821 
1822 .seealso: `MatCreateDense()`
1823 
1824 M*/
1825 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) {
1826   Mat_MPIDense *a;
1827 
1828   PetscFunctionBegin;
1829   PetscCall(PetscNewLog(mat, &a));
1830   mat->data = (void *)a;
1831   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1832 
1833   mat->insertmode = NOT_SET_VALUES;
1834 
1835   /* build cache for off array entries formed */
1836   a->donotstash = PETSC_FALSE;
1837 
1838   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1839 
1840   /* stuff used for matrix vector multiply */
1841   a->lvec        = NULL;
1842   a->Mvctx       = NULL;
1843   a->roworiented = PETSC_TRUE;
1844 
1845   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1846   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1847   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1848   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1849   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1850   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1851   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1852   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1853   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1854   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1855   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1856   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1857   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1858   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1859   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1860   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1861   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1862   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1863   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1864   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1865   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1866 #if defined(PETSC_HAVE_ELEMENTAL)
1867   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1868 #endif
1869 #if defined(PETSC_HAVE_SCALAPACK)
1870   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1871 #endif
1872 #if defined(PETSC_HAVE_CUDA)
1873   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1874 #endif
1875   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1876   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1877   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1878 #if defined(PETSC_HAVE_CUDA)
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1880   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1881 #endif
1882 
1883   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1884   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1885   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 /*MC
1890    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1891 
1892    Options Database Keys:
1893 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1894 
1895   Level: beginner
1896 
1897 .seealso:
1898 
1899 M*/
1900 #if defined(PETSC_HAVE_CUDA)
1901 #include <petsc/private/deviceimpl.h>
1902 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) {
1903   PetscFunctionBegin;
1904   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1905   PetscCall(MatCreate_MPIDense(B));
1906   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
1907   PetscFunctionReturn(0);
1908 }
1909 #endif
1910 
1911 /*MC
1912    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1913 
1914    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1915    and MATMPIDENSE otherwise.
1916 
1917    Options Database Keys:
1918 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1919 
1920   Level: beginner
1921 
1922 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1923 M*/
1924 
1925 /*MC
1926    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1927 
1928    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1929    and MATMPIDENSECUDA otherwise.
1930 
1931    Options Database Keys:
1932 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1933 
1934   Level: beginner
1935 
1936 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1937 M*/
1938 
1939 /*@C
1940    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1941 
1942    Collective
1943 
1944    Input Parameters:
1945 .  B - the matrix
1946 -  data - optional location of matrix data.  Set data=NULL for PETSc
1947    to control all matrix memory allocation.
1948 
1949    Notes:
1950    The dense format is fully compatible with standard Fortran 77
1951    storage by columns.
1952 
1953    The data input variable is intended primarily for Fortran programmers
1954    who wish to allocate their own matrix memory space.  Most users should
1955    set data=NULL.
1956 
1957    Level: intermediate
1958 
1959 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1960 @*/
1961 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) {
1962   PetscFunctionBegin;
1963   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1964   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 /*@
1969    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1970    array provided by the user. This is useful to avoid copying an array
1971    into a matrix
1972 
1973    Not Collective
1974 
1975    Input Parameters:
1976 +  mat - the matrix
1977 -  array - the array in column major order
1978 
1979    Notes:
1980    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1981    freed when the matrix is destroyed.
1982 
1983    Level: developer
1984 
1985 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1986 
1987 @*/
1988 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) {
1989   PetscFunctionBegin;
1990   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1991   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1992   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1993 #if defined(PETSC_HAVE_CUDA)
1994   mat->offloadmask = PETSC_OFFLOAD_CPU;
1995 #endif
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 /*@
2000    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2001 
2002    Not Collective
2003 
2004    Input Parameters:
2005 .  mat - the matrix
2006 
2007    Notes:
2008    You can only call this after a call to MatDensePlaceArray()
2009 
2010    Level: developer
2011 
2012 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2013 
2014 @*/
2015 PetscErrorCode MatDenseResetArray(Mat mat) {
2016   PetscFunctionBegin;
2017   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2018   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2019   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /*@
2024    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2025    array provided by the user. This is useful to avoid copying an array
2026    into a matrix
2027 
2028    Not Collective
2029 
2030    Input Parameters:
2031 +  mat - the matrix
2032 -  array - the array in column major order
2033 
2034    Notes:
2035    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2036    freed by the user. It will be freed when the matrix is destroyed.
2037 
2038    Level: developer
2039 
2040 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2041 @*/
2042 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) {
2043   PetscFunctionBegin;
2044   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2045   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2046   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2047 #if defined(PETSC_HAVE_CUDA)
2048   mat->offloadmask = PETSC_OFFLOAD_CPU;
2049 #endif
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #if defined(PETSC_HAVE_CUDA)
2054 /*@C
2055    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2056    array provided by the user. This is useful to avoid copying an array
2057    into a matrix
2058 
2059    Not Collective
2060 
2061    Input Parameters:
2062 +  mat - the matrix
2063 -  array - the array in column major order
2064 
2065    Notes:
2066    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2067    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2068 
2069    Level: developer
2070 
2071 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2072 @*/
2073 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) {
2074   PetscFunctionBegin;
2075   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2076   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2077   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2078   mat->offloadmask = PETSC_OFFLOAD_GPU;
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /*@C
2083    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2084 
2085    Not Collective
2086 
2087    Input Parameters:
2088 .  mat - the matrix
2089 
2090    Notes:
2091    You can only call this after a call to MatDenseCUDAPlaceArray()
2092 
2093    Level: developer
2094 
2095 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2096 
2097 @*/
2098 PetscErrorCode MatDenseCUDAResetArray(Mat mat) {
2099   PetscFunctionBegin;
2100   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2101   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2102   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 /*@C
2107    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2108    array provided by the user. This is useful to avoid copying an array
2109    into a matrix
2110 
2111    Not Collective
2112 
2113    Input Parameters:
2114 +  mat - the matrix
2115 -  array - the array in column major order
2116 
2117    Notes:
2118    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2119    The memory passed in CANNOT be freed by the user. It will be freed
2120    when the matrix is destroyed. The array should respect the matrix leading dimension.
2121 
2122    Level: developer
2123 
2124 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2125 @*/
2126 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) {
2127   PetscFunctionBegin;
2128   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2129   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2130   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2131   mat->offloadmask = PETSC_OFFLOAD_GPU;
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 /*@C
2136    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2137 
2138    Not Collective
2139 
2140    Input Parameters:
2141 .  A - the matrix
2142 
2143    Output Parameters
2144 .  array - the GPU array in column major order
2145 
2146    Notes:
2147    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2148 
2149    Level: developer
2150 
2151 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2152 @*/
2153 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) {
2154   PetscFunctionBegin;
2155   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2156   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2157   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 /*@C
2162    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2163 
2164    Not Collective
2165 
2166    Input Parameters:
2167 +  A - the matrix
2168 -  array - the GPU array in column major order
2169 
2170    Notes:
2171 
2172    Level: developer
2173 
2174 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2175 @*/
2176 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) {
2177   PetscFunctionBegin;
2178   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2179   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2180   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2181   A->offloadmask = PETSC_OFFLOAD_GPU;
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 /*@C
2186    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2187 
2188    Not Collective
2189 
2190    Input Parameters:
2191 .  A - the matrix
2192 
2193    Output Parameters
2194 .  array - the GPU array in column major order
2195 
2196    Notes:
2197    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2198 
2199    Level: developer
2200 
2201 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2202 @*/
2203 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) {
2204   PetscFunctionBegin;
2205   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2206   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 /*@C
2211    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2212 
2213    Not Collective
2214 
2215    Input Parameters:
2216 +  A - the matrix
2217 -  array - the GPU array in column major order
2218 
2219    Notes:
2220    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2221 
2222    Level: developer
2223 
2224 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2225 @*/
2226 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) {
2227   PetscFunctionBegin;
2228   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 /*@C
2233    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2234 
2235    Not Collective
2236 
2237    Input Parameters:
2238 .  A - the matrix
2239 
2240    Output Parameters
2241 .  array - the GPU array in column major order
2242 
2243    Notes:
2244    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2245 
2246    Level: developer
2247 
2248 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2249 @*/
2250 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) {
2251   PetscFunctionBegin;
2252   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2253   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2254   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 /*@C
2259    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2260 
2261    Not Collective
2262 
2263    Input Parameters:
2264 +  A - the matrix
2265 -  array - the GPU array in column major order
2266 
2267    Notes:
2268 
2269    Level: developer
2270 
2271 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2272 @*/
2273 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) {
2274   PetscFunctionBegin;
2275   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2276   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2277   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2278   A->offloadmask = PETSC_OFFLOAD_GPU;
2279   PetscFunctionReturn(0);
2280 }
2281 #endif
2282 
2283 /*@C
2284    MatCreateDense - Creates a matrix in dense format.
2285 
2286    Collective
2287 
2288    Input Parameters:
2289 +  comm - MPI communicator
2290 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2291 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2292 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2293 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2294 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2295    to control all matrix memory allocation.
2296 
2297    Output Parameter:
2298 .  A - the matrix
2299 
2300    Notes:
2301    The dense format is fully compatible with standard Fortran 77
2302    storage by columns.
2303    Note that, although local portions of the matrix are stored in column-major
2304    order, the matrix is partitioned across MPI ranks by row.
2305 
2306    The data input variable is intended primarily for Fortran programmers
2307    who wish to allocate their own matrix memory space.  Most users should
2308    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2309 
2310    The user MUST specify either the local or global matrix dimensions
2311    (possibly both).
2312 
2313    Level: intermediate
2314 
2315 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2316 @*/
2317 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2318   PetscFunctionBegin;
2319   PetscCall(MatCreate(comm, A));
2320   PetscCall(MatSetSizes(*A, m, n, M, N));
2321   PetscCall(MatSetType(*A, MATDENSE));
2322   PetscCall(MatSeqDenseSetPreallocation(*A, data));
2323   PetscCall(MatMPIDenseSetPreallocation(*A, data));
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #if defined(PETSC_HAVE_CUDA)
2328 /*@C
2329    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2330 
2331    Collective
2332 
2333    Input Parameters:
2334 +  comm - MPI communicator
2335 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2336 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2337 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2338 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2339 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2340    to control matrix memory allocation.
2341 
2342    Output Parameter:
2343 .  A - the matrix
2344 
2345    Notes:
2346 
2347    Level: intermediate
2348 
2349 .seealso: `MatCreate()`, `MatCreateDense()`
2350 @*/
2351 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2352   PetscFunctionBegin;
2353   PetscCall(MatCreate(comm, A));
2354   PetscValidLogicalCollectiveBool(*A, !!data, 6);
2355   PetscCall(MatSetSizes(*A, m, n, M, N));
2356   PetscCall(MatSetType(*A, MATDENSECUDA));
2357   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
2358   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
2359   PetscFunctionReturn(0);
2360 }
2361 #endif
2362 
2363 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) {
2364   Mat           mat;
2365   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
2366 
2367   PetscFunctionBegin;
2368   *newmat = NULL;
2369   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
2370   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2371   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2372   a = (Mat_MPIDense *)mat->data;
2373 
2374   mat->factortype   = A->factortype;
2375   mat->assembled    = PETSC_TRUE;
2376   mat->preallocated = PETSC_TRUE;
2377 
2378   mat->insertmode = NOT_SET_VALUES;
2379   a->donotstash   = oldmat->donotstash;
2380 
2381   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2382   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2383 
2384   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2385   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
2386 
2387   *newmat = mat;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) {
2392   PetscBool isbinary;
2393 #if defined(PETSC_HAVE_HDF5)
2394   PetscBool ishdf5;
2395 #endif
2396 
2397   PetscFunctionBegin;
2398   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2399   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2400   /* force binary viewer to load .info file if it has not yet done so */
2401   PetscCall(PetscViewerSetUp(viewer));
2402   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2403 #if defined(PETSC_HAVE_HDF5)
2404   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2405 #endif
2406   if (isbinary) {
2407     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2408 #if defined(PETSC_HAVE_HDF5)
2409   } else if (ishdf5) {
2410     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2411 #endif
2412   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) {
2417   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2418   Mat           a, b;
2419 
2420   PetscFunctionBegin;
2421   a = matA->A;
2422   b = matB->A;
2423   PetscCall(MatEqual(a, b, flag));
2424   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) {
2429   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2430 
2431   PetscFunctionBegin;
2432   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2433   PetscCall(MatDestroy(&atb->atb));
2434   PetscCall(PetscFree(atb));
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) {
2439   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2440 
2441   PetscFunctionBegin;
2442   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2443   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2444   PetscCall(PetscFree(abt));
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2449   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2450   Mat_TransMatMultDense *atb;
2451   MPI_Comm               comm;
2452   PetscMPIInt            size, *recvcounts;
2453   PetscScalar           *carray, *sendbuf;
2454   const PetscScalar     *atbarray;
2455   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2456   const PetscInt        *ranges;
2457 
2458   PetscFunctionBegin;
2459   MatCheckProduct(C, 3);
2460   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2461   atb        = (Mat_TransMatMultDense *)C->product->data;
2462   recvcounts = atb->recvcounts;
2463   sendbuf    = atb->sendbuf;
2464 
2465   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2466   PetscCallMPI(MPI_Comm_size(comm, &size));
2467 
2468   /* compute atbarray = aseq^T * bseq */
2469   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2470 
2471   PetscCall(MatGetOwnershipRanges(C, &ranges));
2472 
2473   /* arrange atbarray into sendbuf */
2474   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2475   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2476   for (proc = 0, k = 0; proc < size; proc++) {
2477     for (j = 0; j < cN; j++) {
2478       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2479     }
2480   }
2481   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2482 
2483   /* sum all atbarray to local values of C */
2484   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2485   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2486   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2487   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2488   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2493   MPI_Comm               comm;
2494   PetscMPIInt            size;
2495   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2496   Mat_TransMatMultDense *atb;
2497   PetscBool              cisdense;
2498   PetscInt               i;
2499   const PetscInt        *ranges;
2500 
2501   PetscFunctionBegin;
2502   MatCheckProduct(C, 4);
2503   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2504   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2505   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2506     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2507   }
2508 
2509   /* create matrix product C */
2510   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2511   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2512   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2513   PetscCall(MatSetUp(C));
2514 
2515   /* create data structure for reuse C */
2516   PetscCallMPI(MPI_Comm_size(comm, &size));
2517   PetscCall(PetscNew(&atb));
2518   cM = C->rmap->N;
2519   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2520   PetscCall(MatGetOwnershipRanges(C, &ranges));
2521   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2522 
2523   C->product->data    = atb;
2524   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2529   MPI_Comm               comm;
2530   PetscMPIInt            i, size;
2531   PetscInt               maxRows, bufsiz;
2532   PetscMPIInt            tag;
2533   PetscInt               alg;
2534   Mat_MatTransMultDense *abt;
2535   Mat_Product           *product = C->product;
2536   PetscBool              flg;
2537 
2538   PetscFunctionBegin;
2539   MatCheckProduct(C, 4);
2540   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2541   /* check local size of A and B */
2542   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2543 
2544   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2545   alg = flg ? 0 : 1;
2546 
2547   /* setup matrix product C */
2548   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2549   PetscCall(MatSetType(C, MATMPIDENSE));
2550   PetscCall(MatSetUp(C));
2551   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2552 
2553   /* create data structure for reuse C */
2554   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2555   PetscCallMPI(MPI_Comm_size(comm, &size));
2556   PetscCall(PetscNew(&abt));
2557   abt->tag = tag;
2558   abt->alg = alg;
2559   switch (alg) {
2560   case 1: /* alg: "cyclic" */
2561     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2562     bufsiz = A->cmap->N * maxRows;
2563     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2564     break;
2565   default: /* alg: "allgatherv" */
2566     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2567     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2568     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2569     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2570     break;
2571   }
2572 
2573   C->product->data                = abt;
2574   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2575   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) {
2580   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2581   Mat_MatTransMultDense *abt;
2582   MPI_Comm               comm;
2583   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2584   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2585   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2586   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2587   const PetscScalar     *av, *bv;
2588   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2589   MPI_Request            reqs[2];
2590   const PetscInt        *ranges;
2591 
2592   PetscFunctionBegin;
2593   MatCheckProduct(C, 3);
2594   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2595   abt = (Mat_MatTransMultDense *)C->product->data;
2596   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2597   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2598   PetscCallMPI(MPI_Comm_size(comm, &size));
2599   PetscCall(MatDenseGetArrayRead(a->A, &av));
2600   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2601   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2602   PetscCall(MatDenseGetLDA(a->A, &i));
2603   PetscCall(PetscBLASIntCast(i, &alda));
2604   PetscCall(MatDenseGetLDA(b->A, &i));
2605   PetscCall(PetscBLASIntCast(i, &blda));
2606   PetscCall(MatDenseGetLDA(c->A, &i));
2607   PetscCall(PetscBLASIntCast(i, &clda));
2608   PetscCall(MatGetOwnershipRanges(B, &ranges));
2609   bn = B->rmap->n;
2610   if (blda == bn) {
2611     sendbuf = (PetscScalar *)bv;
2612   } else {
2613     sendbuf = abt->buf[0];
2614     for (k = 0, i = 0; i < cK; i++) {
2615       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2616     }
2617   }
2618   if (size > 1) {
2619     sendto   = (rank + size - 1) % size;
2620     recvfrom = (rank + size + 1) % size;
2621   } else {
2622     sendto = recvfrom = 0;
2623   }
2624   PetscCall(PetscBLASIntCast(cK, &ck));
2625   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2626   recvisfrom = rank;
2627   for (i = 0; i < size; i++) {
2628     /* we have finished receiving in sending, bufs can be read/modified */
2629     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2630     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2631 
2632     if (nextrecvisfrom != rank) {
2633       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2634       sendsiz = cK * bn;
2635       recvsiz = cK * nextbn;
2636       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2637       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2638       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2639     }
2640 
2641     /* local aseq * sendbuf^T */
2642     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2643     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2644 
2645     if (nextrecvisfrom != rank) {
2646       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2647       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2648     }
2649     bn         = nextbn;
2650     recvisfrom = nextrecvisfrom;
2651     sendbuf    = recvbuf;
2652   }
2653   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2654   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2655   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2656   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2657   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) {
2662   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2663   Mat_MatTransMultDense *abt;
2664   MPI_Comm               comm;
2665   PetscMPIInt            size;
2666   PetscScalar           *cv, *sendbuf, *recvbuf;
2667   const PetscScalar     *av, *bv;
2668   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2669   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2670   PetscBLASInt           cm, cn, ck, alda, clda;
2671 
2672   PetscFunctionBegin;
2673   MatCheckProduct(C, 3);
2674   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2675   abt = (Mat_MatTransMultDense *)C->product->data;
2676   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2677   PetscCallMPI(MPI_Comm_size(comm, &size));
2678   PetscCall(MatDenseGetArrayRead(a->A, &av));
2679   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2680   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2681   PetscCall(MatDenseGetLDA(a->A, &i));
2682   PetscCall(PetscBLASIntCast(i, &alda));
2683   PetscCall(MatDenseGetLDA(b->A, &blda));
2684   PetscCall(MatDenseGetLDA(c->A, &i));
2685   PetscCall(PetscBLASIntCast(i, &clda));
2686   /* copy transpose of B into buf[0] */
2687   bn      = B->rmap->n;
2688   sendbuf = abt->buf[0];
2689   recvbuf = abt->buf[1];
2690   for (k = 0, j = 0; j < bn; j++) {
2691     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2692   }
2693   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2694   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2695   PetscCall(PetscBLASIntCast(cK, &ck));
2696   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2697   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2698   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2699   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2700   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2701   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2702   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2703   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2708   Mat_MatTransMultDense *abt;
2709 
2710   PetscFunctionBegin;
2711   MatCheckProduct(C, 3);
2712   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2713   abt = (Mat_MatTransMultDense *)C->product->data;
2714   switch (abt->alg) {
2715   case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break;
2716   default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break;
2717   }
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) {
2722   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2723 
2724   PetscFunctionBegin;
2725   PetscCall(MatDestroy(&ab->Ce));
2726   PetscCall(MatDestroy(&ab->Ae));
2727   PetscCall(MatDestroy(&ab->Be));
2728   PetscCall(PetscFree(ab));
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #if defined(PETSC_HAVE_ELEMENTAL)
2733 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2734   Mat_MatMultDense *ab;
2735 
2736   PetscFunctionBegin;
2737   MatCheckProduct(C, 3);
2738   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2739   ab = (Mat_MatMultDense *)C->product->data;
2740   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2741   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2742   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2743   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2748   Mat               Ae, Be, Ce;
2749   Mat_MatMultDense *ab;
2750 
2751   PetscFunctionBegin;
2752   MatCheckProduct(C, 4);
2753   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2754   /* check local size of A and B */
2755   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2756     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2757   }
2758 
2759   /* create elemental matrices Ae and Be */
2760   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2761   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2762   PetscCall(MatSetType(Ae, MATELEMENTAL));
2763   PetscCall(MatSetUp(Ae));
2764   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2765 
2766   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2767   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2768   PetscCall(MatSetType(Be, MATELEMENTAL));
2769   PetscCall(MatSetUp(Be));
2770   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2771 
2772   /* compute symbolic Ce = Ae*Be */
2773   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2774   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2775 
2776   /* setup C */
2777   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2778   PetscCall(MatSetType(C, MATDENSE));
2779   PetscCall(MatSetUp(C));
2780 
2781   /* create data structure for reuse Cdense */
2782   PetscCall(PetscNew(&ab));
2783   ab->Ae = Ae;
2784   ab->Be = Be;
2785   ab->Ce = Ce;
2786 
2787   C->product->data       = ab;
2788   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2789   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2790   PetscFunctionReturn(0);
2791 }
2792 #endif
2793 /* ----------------------------------------------- */
2794 #if defined(PETSC_HAVE_ELEMENTAL)
2795 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) {
2796   PetscFunctionBegin;
2797   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2798   C->ops->productsymbolic = MatProductSymbolic_AB;
2799   PetscFunctionReturn(0);
2800 }
2801 #endif
2802 
2803 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) {
2804   Mat_Product *product = C->product;
2805   Mat          A = product->A, B = product->B;
2806 
2807   PetscFunctionBegin;
2808   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2809     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2810   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2811   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) {
2816   Mat_Product *product     = C->product;
2817   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2818   PetscInt     alg, nalg = 2;
2819   PetscBool    flg = PETSC_FALSE;
2820 
2821   PetscFunctionBegin;
2822   /* Set default algorithm */
2823   alg = 0; /* default is allgatherv */
2824   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2825   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2826 
2827   /* Get runtime option */
2828   if (product->api_user) {
2829     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2830     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2831     PetscOptionsEnd();
2832   } else {
2833     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2834     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2835     PetscOptionsEnd();
2836   }
2837   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2838 
2839   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2840   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) {
2845   Mat_Product *product = C->product;
2846 
2847   PetscFunctionBegin;
2848   switch (product->type) {
2849 #if defined(PETSC_HAVE_ELEMENTAL)
2850   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break;
2851 #endif
2852   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break;
2853   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break;
2854   default: break;
2855   }
2856   PetscFunctionReturn(0);
2857 }
2858