xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cf906205f1df435953e2a75894dc7795d3ff0aed)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
12               matrix that represents the operator. For sequential matrices it returns itself.
13 
14     Input Parameter:
15 .      A - the sequential or MPI dense matrix
16 
17     Output Parameter:
18 .      B - the inner matrix
19 
20     Level: intermediate
21 
22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) {
25   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
26   PetscBool     flg;
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
30   PetscValidPointer(B, 2);
31   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
32   if (flg) *B = mat->A;
33   else {
34     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
35     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) {
42   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
43   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
44 
45   PetscFunctionBegin;
46   PetscCall(MatCopy(Amat->A, Bmat->A, s));
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) {
51   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
52   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
53   PetscScalar  *v;
54 
55   PetscFunctionBegin;
56   PetscCall(MatDenseGetArray(mat->A, &v));
57   PetscCall(MatDenseGetLDA(mat->A, &lda));
58   rend2 = PetscMin(rend, A->cmap->N);
59   if (rend2 > rstart) {
60     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
61     PetscCall(PetscLogFlops(rend2 - rstart));
62   }
63   PetscCall(MatDenseRestoreArray(mat->A, &v));
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
68   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
69   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
70 
71   PetscFunctionBegin;
72   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
73   lrow = row - rstart;
74   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
79   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
80   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
81 
82   PetscFunctionBegin;
83   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
84   lrow = row - rstart;
85   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) {
90   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
91   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
92   PetscScalar  *array;
93   MPI_Comm      comm;
94   PetscBool     flg;
95   Mat           B;
96 
97   PetscFunctionBegin;
98   PetscCall(MatHasCongruentLayouts(A, &flg));
99   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
100   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
101   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
102 
103     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
104     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
105     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
106     PetscCall(MatCreate(comm, &B));
107     PetscCall(MatSetSizes(B, m, m, m, m));
108     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
109     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
110     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
111     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
112     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
113     *a = B;
114     PetscCall(MatDestroy(&B));
115   } else *a = B;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) {
120   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
121   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
122   PetscBool     roworiented = A->roworiented;
123 
124   PetscFunctionBegin;
125   for (i = 0; i < m; i++) {
126     if (idxm[i] < 0) continue;
127     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
128     if (idxm[i] >= rstart && idxm[i] < rend) {
129       row = idxm[i] - rstart;
130       if (roworiented) {
131         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
132       } else {
133         for (j = 0; j < n; j++) {
134           if (idxn[j] < 0) continue;
135           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
136           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
137         }
138       }
139     } else if (!A->donotstash) {
140       mat->assembled = PETSC_FALSE;
141       if (roworiented) {
142         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
143       } else {
144         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
145       }
146     }
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
152   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
153   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
154 
155   PetscFunctionBegin;
156   for (i = 0; i < m; i++) {
157     if (idxm[i] < 0) continue; /* negative row */
158     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
159     if (idxm[i] >= rstart && idxm[i] < rend) {
160       row = idxm[i] - rstart;
161       for (j = 0; j < n; j++) {
162         if (idxn[j] < 0) continue; /* negative column */
163         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
164         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
165       }
166     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) {
172   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
173 
174   PetscFunctionBegin;
175   PetscCall(MatDenseGetLDA(a->A, lda));
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) {
180   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
181   PetscBool     iscuda;
182 
183   PetscFunctionBegin;
184   if (!a->A) {
185     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
186     PetscCall(PetscLayoutSetUp(A->rmap));
187     PetscCall(PetscLayoutSetUp(A->cmap));
188     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
189     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A));
190     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
191     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
192     PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
193   }
194   PetscCall(MatDenseSetLDA(a->A, lda));
195   PetscFunctionReturn(0);
196 }
197 
198 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) {
199   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
200 
201   PetscFunctionBegin;
202   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203   PetscCall(MatDenseGetArray(a->A, array));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) {
208   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
209 
210   PetscFunctionBegin;
211   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
212   PetscCall(MatDenseGetArrayRead(a->A, array));
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) {
217   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
218 
219   PetscFunctionBegin;
220   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
221   PetscCall(MatDenseGetArrayWrite(a->A, array));
222   PetscFunctionReturn(0);
223 }
224 
225 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) {
226   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
227 
228   PetscFunctionBegin;
229   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
230   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
231   PetscCall(MatDensePlaceArray(a->A, array));
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) {
236   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
237 
238   PetscFunctionBegin;
239   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
240   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
241   PetscCall(MatDenseResetArray(a->A));
242   PetscFunctionReturn(0);
243 }
244 
245 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) {
246   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
247 
248   PetscFunctionBegin;
249   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
250   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
251   PetscCall(MatDenseReplaceArray(a->A, array));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) {
256   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
257   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
258   const PetscInt    *irow, *icol;
259   const PetscScalar *v;
260   PetscScalar       *bv;
261   Mat                newmat;
262   IS                 iscol_local;
263   MPI_Comm           comm_is, comm_mat;
264 
265   PetscFunctionBegin;
266   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
267   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
268   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
269 
270   PetscCall(ISAllGather(iscol, &iscol_local));
271   PetscCall(ISGetIndices(isrow, &irow));
272   PetscCall(ISGetIndices(iscol_local, &icol));
273   PetscCall(ISGetLocalSize(isrow, &nrows));
274   PetscCall(ISGetLocalSize(iscol, &ncols));
275   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
276 
277   /* No parallel redistribution currently supported! Should really check each index set
278      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
279      original matrix! */
280 
281   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
282   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
283 
284   /* Check submatrix call */
285   if (scall == MAT_REUSE_MATRIX) {
286     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
287     /* Really need to test rows and column sizes! */
288     newmat = *B;
289   } else {
290     /* Create and fill new matrix */
291     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
292     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
293     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
294     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
295   }
296 
297   /* Now extract the data pointers and do the copy, column at a time */
298   newmatd = (Mat_MPIDense *)newmat->data;
299   PetscCall(MatDenseGetArray(newmatd->A, &bv));
300   PetscCall(MatDenseGetArrayRead(mat->A, &v));
301   PetscCall(MatDenseGetLDA(mat->A, &lda));
302   for (i = 0; i < Ncols; i++) {
303     const PetscScalar *av = v + lda * icol[i];
304     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
305   }
306   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
307   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
308 
309   /* Assemble the matrices so that the correct flags are set */
310   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
311   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
312 
313   /* Free work space */
314   PetscCall(ISRestoreIndices(isrow, &irow));
315   PetscCall(ISRestoreIndices(iscol_local, &icol));
316   PetscCall(ISDestroy(&iscol_local));
317   *B = newmat;
318   PetscFunctionReturn(0);
319 }
320 
321 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) {
322   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
323 
324   PetscFunctionBegin;
325   PetscCall(MatDenseRestoreArray(a->A, array));
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) {
330   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
331 
332   PetscFunctionBegin;
333   PetscCall(MatDenseRestoreArrayRead(a->A, array));
334   PetscFunctionReturn(0);
335 }
336 
337 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) {
338   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
339 
340   PetscFunctionBegin;
341   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
342   PetscFunctionReturn(0);
343 }
344 
345 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) {
346   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
347   PetscInt      nstash, reallocs;
348 
349   PetscFunctionBegin;
350   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
351 
352   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
353   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
354   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) {
359   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
360   PetscInt      i, *row, *col, flg, j, rstart, ncols;
361   PetscMPIInt   n;
362   PetscScalar  *val;
363 
364   PetscFunctionBegin;
365   if (!mdn->donotstash && !mat->nooffprocentries) {
366     /*  wait on receives */
367     while (1) {
368       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
369       if (!flg) break;
370 
371       for (i = 0; i < n;) {
372         /* Now identify the consecutive vals belonging to the same row */
373         for (j = i, rstart = row[j]; j < n; j++) {
374           if (row[j] != rstart) break;
375         }
376         if (j < n) ncols = j - i;
377         else ncols = n - i;
378         /* Now assemble all these values with a single function call */
379         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
380         i = j;
381       }
382     }
383     PetscCall(MatStashScatterEnd_Private(&mat->stash));
384   }
385 
386   PetscCall(MatAssemblyBegin(mdn->A, mode));
387   PetscCall(MatAssemblyEnd(mdn->A, mode));
388   PetscFunctionReturn(0);
389 }
390 
391 PetscErrorCode MatZeroEntries_MPIDense(Mat A) {
392   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
393 
394   PetscFunctionBegin;
395   PetscCall(MatZeroEntries(l->A));
396   PetscFunctionReturn(0);
397 }
398 
399 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
400   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
401   PetscInt      i, len, *lrows;
402 
403   PetscFunctionBegin;
404   /* get locally owned rows */
405   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
406   /* fix right hand side if needed */
407   if (x && b) {
408     const PetscScalar *xx;
409     PetscScalar       *bb;
410 
411     PetscCall(VecGetArrayRead(x, &xx));
412     PetscCall(VecGetArrayWrite(b, &bb));
413     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
414     PetscCall(VecRestoreArrayRead(x, &xx));
415     PetscCall(VecRestoreArrayWrite(b, &bb));
416   }
417   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
418   if (diag != 0.0) {
419     Vec d;
420 
421     PetscCall(MatCreateVecs(A, NULL, &d));
422     PetscCall(VecSet(d, diag));
423     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
424     PetscCall(VecDestroy(&d));
425   }
426   PetscCall(PetscFree(lrows));
427   PetscFunctionReturn(0);
428 }
429 
430 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
431 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
432 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
433 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
434 
435 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) {
436   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
437   const PetscScalar *ax;
438   PetscScalar       *ay;
439   PetscMemType       axmtype, aymtype;
440 
441   PetscFunctionBegin;
442   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
443   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
444   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
445   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
446   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
447   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
448   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
449   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) {
454   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
455   const PetscScalar *ax;
456   PetscScalar       *ay;
457   PetscMemType       axmtype, aymtype;
458 
459   PetscFunctionBegin;
460   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
461   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
462   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
463   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
464   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
465   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
466   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
467   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
468   PetscFunctionReturn(0);
469 }
470 
471 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) {
472   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
473   const PetscScalar *ax;
474   PetscScalar       *ay;
475   PetscMemType       axmtype, aymtype;
476 
477   PetscFunctionBegin;
478   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
479   PetscCall(VecSet(yy, 0.0));
480   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
481   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
483   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
484   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
485   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
486   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
487   PetscFunctionReturn(0);
488 }
489 
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) {
491   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
492   const PetscScalar *ax;
493   PetscScalar       *ay;
494   PetscMemType       axmtype, aymtype;
495 
496   PetscFunctionBegin;
497   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
498   PetscCall(VecCopy(yy, zz));
499   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
500   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
502   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
503   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
504   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
505   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) {
510   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
511   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
512   PetscScalar       *x, zero = 0.0;
513   const PetscScalar *av;
514 
515   PetscFunctionBegin;
516   PetscCall(VecSet(v, zero));
517   PetscCall(VecGetArray(v, &x));
518   PetscCall(VecGetSize(v, &n));
519   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
520   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
521   radd = A->rmap->rstart * m;
522   PetscCall(MatDenseGetArrayRead(a->A, &av));
523   PetscCall(MatDenseGetLDA(a->A, &lda));
524   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
525   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
526   PetscCall(VecRestoreArray(v, &x));
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatDestroy_MPIDense(Mat mat) {
531   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
532 
533   PetscFunctionBegin;
534 #if defined(PETSC_USE_LOG)
535   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
536 #endif
537   PetscCall(MatStashDestroy_Private(&mat->stash));
538   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
539   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
540   PetscCall(MatDestroy(&mdn->A));
541   PetscCall(VecDestroy(&mdn->lvec));
542   PetscCall(PetscSFDestroy(&mdn->Mvctx));
543   PetscCall(VecDestroy(&mdn->cvec));
544   PetscCall(MatDestroy(&mdn->cmat));
545 
546   PetscCall(PetscFree(mat->data));
547   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
548 
549   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
550   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
551   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
552   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
553   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
554   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
555   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
556   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
557   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
558   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
559   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
560   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
561   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
562 #if defined(PETSC_HAVE_ELEMENTAL)
563   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
564 #endif
565 #if defined(PETSC_HAVE_SCALAPACK)
566   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
567 #endif
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
570   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
571 #if defined(PETSC_HAVE_CUDA)
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
578   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
580   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
581   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
582   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
589 #endif
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
600 
601   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
602   PetscFunctionReturn(0);
603 }
604 
605 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
606 
607 #include <petscdraw.h>
608 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
609   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
610   PetscMPIInt       rank;
611   PetscViewerType   vtype;
612   PetscBool         iascii, isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
618   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
619   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
620   if (iascii) {
621     PetscCall(PetscViewerGetType(viewer, &vtype));
622     PetscCall(PetscViewerGetFormat(viewer, &format));
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
626       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
627       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
628                                                    (PetscInt)info.memory));
629       PetscCall(PetscViewerFlush(viewer));
630       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
631       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
632       PetscFunctionReturn(0);
633     } else if (format == PETSC_VIEWER_ASCII_INFO) {
634       PetscFunctionReturn(0);
635     }
636   } else if (isdraw) {
637     PetscDraw draw;
638     PetscBool isnull;
639 
640     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
641     PetscCall(PetscDrawIsNull(draw, &isnull));
642     if (isnull) PetscFunctionReturn(0);
643   }
644 
645   {
646     /* assemble the entire matrix onto first processor. */
647     Mat          A;
648     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
649     PetscInt    *cols;
650     PetscScalar *vals;
651 
652     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
653     if (rank == 0) {
654       PetscCall(MatSetSizes(A, M, N, M, N));
655     } else {
656       PetscCall(MatSetSizes(A, 0, 0, M, N));
657     }
658     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
659     PetscCall(MatSetType(A, MATMPIDENSE));
660     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
661     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A));
662 
663     /* Copy the matrix ... This isn't the most efficient means,
664        but it's quick for now */
665     A->insertmode = INSERT_VALUES;
666 
667     row = mat->rmap->rstart;
668     m   = mdn->A->rmap->n;
669     for (i = 0; i < m; i++) {
670       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
671       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
672       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
673       row++;
674     }
675 
676     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
677     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
678     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
679     if (rank == 0) {
680       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
681       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
682     }
683     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
684     PetscCall(PetscViewerFlush(viewer));
685     PetscCall(MatDestroy(&A));
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) {
691   PetscBool iascii, isbinary, isdraw, issocket;
692 
693   PetscFunctionBegin;
694   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
695   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
696   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
697   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
698 
699   if (iascii || issocket || isdraw) {
700     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
701   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
702   PetscFunctionReturn(0);
703 }
704 
705 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) {
706   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
707   Mat            mdn = mat->A;
708   PetscLogDouble isend[5], irecv[5];
709 
710   PetscFunctionBegin;
711   info->block_size = 1.0;
712 
713   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
714 
715   isend[0] = info->nz_used;
716   isend[1] = info->nz_allocated;
717   isend[2] = info->nz_unneeded;
718   isend[3] = info->memory;
719   isend[4] = info->mallocs;
720   if (flag == MAT_LOCAL) {
721     info->nz_used      = isend[0];
722     info->nz_allocated = isend[1];
723     info->nz_unneeded  = isend[2];
724     info->memory       = isend[3];
725     info->mallocs      = isend[4];
726   } else if (flag == MAT_GLOBAL_MAX) {
727     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
728 
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
736 
737     info->nz_used      = irecv[0];
738     info->nz_allocated = irecv[1];
739     info->nz_unneeded  = irecv[2];
740     info->memory       = irecv[3];
741     info->mallocs      = irecv[4];
742   }
743   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
744   info->fill_ratio_needed = 0;
745   info->factor_mallocs    = 0;
746   PetscFunctionReturn(0);
747 }
748 
749 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) {
750   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
751 
752   PetscFunctionBegin;
753   switch (op) {
754   case MAT_NEW_NONZERO_LOCATIONS:
755   case MAT_NEW_NONZERO_LOCATION_ERR:
756   case MAT_NEW_NONZERO_ALLOCATION_ERR:
757     MatCheckPreallocated(A, 1);
758     PetscCall(MatSetOption(a->A, op, flg));
759     break;
760   case MAT_ROW_ORIENTED:
761     MatCheckPreallocated(A, 1);
762     a->roworiented = flg;
763     PetscCall(MatSetOption(a->A, op, flg));
764     break;
765   case MAT_FORCE_DIAGONAL_ENTRIES:
766   case MAT_KEEP_NONZERO_PATTERN:
767   case MAT_USE_HASH_TABLE:
768   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
769   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
770   case MAT_SYMMETRIC:
771   case MAT_STRUCTURALLY_SYMMETRIC:
772   case MAT_HERMITIAN:
773   case MAT_SYMMETRY_ETERNAL:
774   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
775   case MAT_SPD:
776   case MAT_IGNORE_LOWER_TRIANGULAR:
777   case MAT_IGNORE_ZERO_ENTRIES:
778   case MAT_SPD_ETERNAL:
779     /* if the diagonal matrix is square it inherits some of the properties above */
780     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
781     break;
782   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) {
788   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
789   const PetscScalar *l;
790   PetscScalar        x, *v, *vv, *r;
791   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
792 
793   PetscFunctionBegin;
794   PetscCall(MatDenseGetArray(mdn->A, &vv));
795   PetscCall(MatDenseGetLDA(mdn->A, &lda));
796   PetscCall(MatGetLocalSize(A, &s2, &s3));
797   if (ll) {
798     PetscCall(VecGetLocalSize(ll, &s2a));
799     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
800     PetscCall(VecGetArrayRead(ll, &l));
801     for (i = 0; i < m; i++) {
802       x = l[i];
803       v = vv + i;
804       for (j = 0; j < n; j++) {
805         (*v) *= x;
806         v += lda;
807       }
808     }
809     PetscCall(VecRestoreArrayRead(ll, &l));
810     PetscCall(PetscLogFlops(1.0 * n * m));
811   }
812   if (rr) {
813     const PetscScalar *ar;
814 
815     PetscCall(VecGetLocalSize(rr, &s3a));
816     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
817     PetscCall(VecGetArrayRead(rr, &ar));
818     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
819     PetscCall(VecGetArray(mdn->lvec, &r));
820     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
821     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
822     PetscCall(VecRestoreArrayRead(rr, &ar));
823     for (i = 0; i < n; i++) {
824       x = r[i];
825       v = vv + i * lda;
826       for (j = 0; j < m; j++) (*v++) *= x;
827     }
828     PetscCall(VecRestoreArray(mdn->lvec, &r));
829     PetscCall(PetscLogFlops(1.0 * n * m));
830   }
831   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) {
836   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
837   PetscInt           i, j;
838   PetscMPIInt        size;
839   PetscReal          sum = 0.0;
840   const PetscScalar *av, *v;
841 
842   PetscFunctionBegin;
843   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
844   v = av;
845   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
846   if (size == 1) {
847     PetscCall(MatNorm(mdn->A, type, nrm));
848   } else {
849     if (type == NORM_FROBENIUS) {
850       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
851         sum += PetscRealPart(PetscConj(*v) * (*v));
852         v++;
853       }
854       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
855       *nrm = PetscSqrtReal(*nrm);
856       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
857     } else if (type == NORM_1) {
858       PetscReal *tmp, *tmp2;
859       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
860       *nrm = 0.0;
861       v    = av;
862       for (j = 0; j < mdn->A->cmap->n; j++) {
863         for (i = 0; i < mdn->A->rmap->n; i++) {
864           tmp[j] += PetscAbsScalar(*v);
865           v++;
866         }
867       }
868       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
869       for (j = 0; j < A->cmap->N; j++) {
870         if (tmp2[j] > *nrm) *nrm = tmp2[j];
871       }
872       PetscCall(PetscFree2(tmp, tmp2));
873       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
874     } else if (type == NORM_INFINITY) { /* max row norm */
875       PetscReal ntemp;
876       PetscCall(MatNorm(mdn->A, type, &ntemp));
877       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
878     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
879   }
880   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) {
885   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
886   Mat           B;
887   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
888   PetscInt      j, i, lda;
889   PetscScalar  *v;
890 
891   PetscFunctionBegin;
892   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
893   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
894     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
895     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
896     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
897     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
898   } else B = *matout;
899 
900   m = a->A->rmap->n;
901   n = a->A->cmap->n;
902   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
903   PetscCall(MatDenseGetLDA(a->A, &lda));
904   PetscCall(PetscMalloc1(m, &rwork));
905   for (i = 0; i < m; i++) rwork[i] = rstart + i;
906   for (j = 0; j < n; j++) {
907     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
908     v += lda;
909   }
910   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
911   PetscCall(PetscFree(rwork));
912   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
913   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
914   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
915     *matout = B;
916   } else {
917     PetscCall(MatHeaderMerge(A, &B));
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
924 
925 PetscErrorCode MatSetUp_MPIDense(Mat A) {
926   PetscFunctionBegin;
927   PetscCall(PetscLayoutSetUp(A->rmap));
928   PetscCall(PetscLayoutSetUp(A->cmap));
929   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) {
934   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
935 
936   PetscFunctionBegin;
937   PetscCall(MatAXPY(A->A, alpha, B->A, str));
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatConjugate_MPIDense(Mat mat) {
942   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
943 
944   PetscFunctionBegin;
945   PetscCall(MatConjugate(a->A));
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatRealPart_MPIDense(Mat A) {
950   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
951 
952   PetscFunctionBegin;
953   PetscCall(MatRealPart(a->A));
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959 
960   PetscFunctionBegin;
961   PetscCall(MatImaginaryPart(a->A));
962   PetscFunctionReturn(0);
963 }
964 
965 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) {
966   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
967 
968   PetscFunctionBegin;
969   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
970   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
971   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
972   PetscFunctionReturn(0);
973 }
974 
975 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
976 
977 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) {
978   PetscInt      i, m, n;
979   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
980   PetscReal    *work;
981 
982   PetscFunctionBegin;
983   PetscCall(MatGetSize(A, &m, &n));
984   PetscCall(PetscMalloc1(n, &work));
985   if (type == REDUCTION_MEAN_REALPART) {
986     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
987   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
988     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
989   } else {
990     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
991   }
992   if (type == NORM_2) {
993     for (i = 0; i < n; i++) work[i] *= work[i];
994   }
995   if (type == NORM_INFINITY) {
996     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
997   } else {
998     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
999   }
1000   PetscCall(PetscFree(work));
1001   if (type == NORM_2) {
1002     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1003   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1004     for (i = 0; i < n; i++) reductions[i] /= m;
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #if defined(PETSC_HAVE_CUDA)
1010 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) {
1011   PetscScalar *da;
1012   PetscInt     lda;
1013 
1014   PetscFunctionBegin;
1015   PetscCall(MatDenseCUDAGetArray(A, &da));
1016   PetscCall(MatDenseGetLDA(A, &lda));
1017   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1018   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1019   PetscCall(MatDenseCUDARestoreArray(A, &da));
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1024   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1025   PetscInt      lda;
1026 
1027   PetscFunctionBegin;
1028   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1029   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1030   if (!a->cvec) {
1031     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1032     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1033   }
1034   a->vecinuse = col + 1;
1035   PetscCall(MatDenseGetLDA(a->A, &lda));
1036   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1037   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1038   *v = a->cvec;
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1043   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1044 
1045   PetscFunctionBegin;
1046   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1047   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1048   a->vecinuse = 0;
1049   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1050   PetscCall(VecCUDAResetArray(a->cvec));
1051   if (v) *v = NULL;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1056   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1057   PetscInt      lda;
1058 
1059   PetscFunctionBegin;
1060   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1061   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1062   if (!a->cvec) {
1063     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1064     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1065   }
1066   a->vecinuse = col + 1;
1067   PetscCall(MatDenseGetLDA(a->A, &lda));
1068   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1069   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1070   PetscCall(VecLockReadPush(a->cvec));
1071   *v = a->cvec;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1076   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1077 
1078   PetscFunctionBegin;
1079   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1080   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1081   a->vecinuse = 0;
1082   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1083   PetscCall(VecLockReadPop(a->cvec));
1084   PetscCall(VecCUDAResetArray(a->cvec));
1085   if (v) *v = NULL;
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1090   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1091   PetscInt      lda;
1092 
1093   PetscFunctionBegin;
1094   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1095   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1096   if (!a->cvec) {
1097     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1098     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1099   }
1100   a->vecinuse = col + 1;
1101   PetscCall(MatDenseGetLDA(a->A, &lda));
1102   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1103   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1104   *v = a->cvec;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) {
1109   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1113   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1114   a->vecinuse = 0;
1115   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1116   PetscCall(VecCUDAResetArray(a->cvec));
1117   if (v) *v = NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1122   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1123 
1124   PetscFunctionBegin;
1125   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1126   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1127   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) {
1132   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1133 
1134   PetscFunctionBegin;
1135   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1136   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1137   PetscCall(MatDenseCUDAResetArray(l->A));
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) {
1142   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1143 
1144   PetscFunctionBegin;
1145   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1146   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1147   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1152   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) {
1160   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1168   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) {
1176   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1184   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatDenseCUDAGetArray(l->A, a));
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) {
1192   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1200 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1201 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1202 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1203 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1204 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1205 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1206 
1207 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) {
1208   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1209 
1210   PetscFunctionBegin;
1211   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1212   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1213   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1214   mat->boundtocpu = bind;
1215   if (!bind) {
1216     PetscBool iscuda;
1217 
1218     PetscCall(PetscFree(mat->defaultrandtype));
1219     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1220     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1221     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1222     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1223     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1224     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1225     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1226     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1227     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1228     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1229     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1230     mat->ops->shift = MatShift_MPIDenseCUDA;
1231   } else {
1232     PetscCall(PetscFree(mat->defaultrandtype));
1233     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1234     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1235     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1236     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1237     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1238     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1239     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1240     mat->ops->shift = MatShift_MPIDense;
1241   }
1242   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) {
1247   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1248   PetscBool     iscuda;
1249 
1250   PetscFunctionBegin;
1251   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1252   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1253   if (!iscuda) PetscFunctionReturn(0);
1254   PetscCall(PetscLayoutSetUp(A->rmap));
1255   PetscCall(PetscLayoutSetUp(A->cmap));
1256   if (!d->A) {
1257     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1258     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A));
1259     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1260   }
1261   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1262   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1263   A->preallocated = PETSC_TRUE;
1264   A->assembled    = PETSC_TRUE;
1265   PetscFunctionReturn(0);
1266 }
1267 #endif
1268 
1269 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) {
1270   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1271 
1272   PetscFunctionBegin;
1273   PetscCall(MatSetRandom(d->A, rctx));
1274 #if defined(PETSC_HAVE_DEVICE)
1275   x->offloadmask = d->A->offloadmask;
1276 #endif
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) {
1281   PetscFunctionBegin;
1282   *missing = PETSC_FALSE;
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1287 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1288 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1289 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1290 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1291 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1292 
1293 /* -------------------------------------------------------------------*/
1294 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1295                                        MatGetRow_MPIDense,
1296                                        MatRestoreRow_MPIDense,
1297                                        MatMult_MPIDense,
1298                                        /*  4*/ MatMultAdd_MPIDense,
1299                                        MatMultTranspose_MPIDense,
1300                                        MatMultTransposeAdd_MPIDense,
1301                                        NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        /* 10*/ NULL,
1305                                        NULL,
1306                                        NULL,
1307                                        NULL,
1308                                        MatTranspose_MPIDense,
1309                                        /* 15*/ MatGetInfo_MPIDense,
1310                                        MatEqual_MPIDense,
1311                                        MatGetDiagonal_MPIDense,
1312                                        MatDiagonalScale_MPIDense,
1313                                        MatNorm_MPIDense,
1314                                        /* 20*/ MatAssemblyBegin_MPIDense,
1315                                        MatAssemblyEnd_MPIDense,
1316                                        MatSetOption_MPIDense,
1317                                        MatZeroEntries_MPIDense,
1318                                        /* 24*/ MatZeroRows_MPIDense,
1319                                        NULL,
1320                                        NULL,
1321                                        NULL,
1322                                        NULL,
1323                                        /* 29*/ MatSetUp_MPIDense,
1324                                        NULL,
1325                                        NULL,
1326                                        MatGetDiagonalBlock_MPIDense,
1327                                        NULL,
1328                                        /* 34*/ MatDuplicate_MPIDense,
1329                                        NULL,
1330                                        NULL,
1331                                        NULL,
1332                                        NULL,
1333                                        /* 39*/ MatAXPY_MPIDense,
1334                                        MatCreateSubMatrices_MPIDense,
1335                                        NULL,
1336                                        MatGetValues_MPIDense,
1337                                        MatCopy_MPIDense,
1338                                        /* 44*/ NULL,
1339                                        MatScale_MPIDense,
1340                                        MatShift_MPIDense,
1341                                        NULL,
1342                                        NULL,
1343                                        /* 49*/ MatSetRandom_MPIDense,
1344                                        NULL,
1345                                        NULL,
1346                                        NULL,
1347                                        NULL,
1348                                        /* 54*/ NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        NULL,
1352                                        NULL,
1353                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1354                                        MatDestroy_MPIDense,
1355                                        MatView_MPIDense,
1356                                        NULL,
1357                                        NULL,
1358                                        /* 64*/ NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        NULL,
1362                                        NULL,
1363                                        /* 69*/ NULL,
1364                                        NULL,
1365                                        NULL,
1366                                        NULL,
1367                                        NULL,
1368                                        /* 74*/ NULL,
1369                                        NULL,
1370                                        NULL,
1371                                        NULL,
1372                                        NULL,
1373                                        /* 79*/ NULL,
1374                                        NULL,
1375                                        NULL,
1376                                        NULL,
1377                                        /* 83*/ MatLoad_MPIDense,
1378                                        NULL,
1379                                        NULL,
1380                                        NULL,
1381                                        NULL,
1382                                        NULL,
1383                                        /* 89*/ NULL,
1384                                        NULL,
1385                                        NULL,
1386                                        NULL,
1387                                        NULL,
1388                                        /* 94*/ NULL,
1389                                        NULL,
1390                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1391                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1392                                        NULL,
1393                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1394                                        NULL,
1395                                        NULL,
1396                                        MatConjugate_MPIDense,
1397                                        NULL,
1398                                        /*104*/ NULL,
1399                                        MatRealPart_MPIDense,
1400                                        MatImaginaryPart_MPIDense,
1401                                        NULL,
1402                                        NULL,
1403                                        /*109*/ NULL,
1404                                        NULL,
1405                                        NULL,
1406                                        MatGetColumnVector_MPIDense,
1407                                        MatMissingDiagonal_MPIDense,
1408                                        /*114*/ NULL,
1409                                        NULL,
1410                                        NULL,
1411                                        NULL,
1412                                        NULL,
1413                                        /*119*/ NULL,
1414                                        NULL,
1415                                        NULL,
1416                                        NULL,
1417                                        NULL,
1418                                        /*124*/ NULL,
1419                                        MatGetColumnReductions_MPIDense,
1420                                        NULL,
1421                                        NULL,
1422                                        NULL,
1423                                        /*129*/ NULL,
1424                                        NULL,
1425                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1426                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1427                                        NULL,
1428                                        /*134*/ NULL,
1429                                        NULL,
1430                                        NULL,
1431                                        NULL,
1432                                        NULL,
1433                                        /*139*/ NULL,
1434                                        NULL,
1435                                        NULL,
1436                                        NULL,
1437                                        NULL,
1438                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1439                                        /*145*/ NULL,
1440                                        NULL,
1441                                        NULL,
1442                                        NULL,
1443                                        NULL,
1444                                        /*150*/ NULL};
1445 
1446 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) {
1447   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1448   PetscBool     iscuda;
1449 
1450   PetscFunctionBegin;
1451   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1452   PetscCall(PetscLayoutSetUp(mat->rmap));
1453   PetscCall(PetscLayoutSetUp(mat->cmap));
1454   if (!a->A) {
1455     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1456     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
1457     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1458   }
1459   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1460   PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1461   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1462 #if defined(PETSC_HAVE_DEVICE)
1463   mat->offloadmask = a->A->offloadmask;
1464 #endif
1465   mat->preallocated = PETSC_TRUE;
1466   mat->assembled    = PETSC_TRUE;
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1471   Mat B, C;
1472 
1473   PetscFunctionBegin;
1474   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1475   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1476   PetscCall(MatDestroy(&C));
1477   if (reuse == MAT_REUSE_MATRIX) {
1478     C = *newmat;
1479   } else C = NULL;
1480   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1481   PetscCall(MatDestroy(&B));
1482   if (reuse == MAT_INPLACE_MATRIX) {
1483     PetscCall(MatHeaderReplace(A, &C));
1484   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1489   Mat B, C;
1490 
1491   PetscFunctionBegin;
1492   PetscCall(MatDenseGetLocalMatrix(A, &C));
1493   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1494   if (reuse == MAT_REUSE_MATRIX) {
1495     C = *newmat;
1496   } else C = NULL;
1497   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1498   PetscCall(MatDestroy(&B));
1499   if (reuse == MAT_INPLACE_MATRIX) {
1500     PetscCall(MatHeaderReplace(A, &C));
1501   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #if defined(PETSC_HAVE_ELEMENTAL)
1506 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1507   Mat          mat_elemental;
1508   PetscScalar *v;
1509   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1510 
1511   PetscFunctionBegin;
1512   if (reuse == MAT_REUSE_MATRIX) {
1513     mat_elemental = *newmat;
1514     PetscCall(MatZeroEntries(*newmat));
1515   } else {
1516     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1517     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1518     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1519     PetscCall(MatSetUp(mat_elemental));
1520     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1521   }
1522 
1523   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1524   for (i = 0; i < N; i++) cols[i] = i;
1525   for (i = 0; i < m; i++) rows[i] = rstart + i;
1526 
1527   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1528   PetscCall(MatDenseGetArray(A, &v));
1529   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1530   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1531   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1532   PetscCall(MatDenseRestoreArray(A, &v));
1533   PetscCall(PetscFree2(rows, cols));
1534 
1535   if (reuse == MAT_INPLACE_MATRIX) {
1536     PetscCall(MatHeaderReplace(A, &mat_elemental));
1537   } else {
1538     *newmat = mat_elemental;
1539   }
1540   PetscFunctionReturn(0);
1541 }
1542 #endif
1543 
1544 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) {
1545   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1546 
1547   PetscFunctionBegin;
1548   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) {
1553   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1554 
1555   PetscFunctionBegin;
1556   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
1561   Mat_MPIDense *mat;
1562   PetscInt      m, nloc, N;
1563 
1564   PetscFunctionBegin;
1565   PetscCall(MatGetSize(inmat, &m, &N));
1566   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1567   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1568     PetscInt sum;
1569 
1570     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1571     /* Check sum(n) = N */
1572     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1573     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1574 
1575     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1576     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1577   }
1578 
1579   /* numeric phase */
1580   mat = (Mat_MPIDense *)(*outmat)->data;
1581   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #if defined(PETSC_HAVE_CUDA)
1586 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1587   Mat           B;
1588   Mat_MPIDense *m;
1589 
1590   PetscFunctionBegin;
1591   if (reuse == MAT_INITIAL_MATRIX) {
1592     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1593   } else if (reuse == MAT_REUSE_MATRIX) {
1594     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1595   }
1596 
1597   B = *newmat;
1598   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1599   PetscCall(PetscFree(B->defaultvectype));
1600   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1601   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1602   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1603   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1604   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1605   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1606   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1607   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1608   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1609   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1610   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1611   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1612   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1613   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1614   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1615   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1616   m = (Mat_MPIDense *)(B)->data;
1617   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1618   B->ops->bindtocpu = NULL;
1619   B->offloadmask    = PETSC_OFFLOAD_CPU;
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) {
1624   Mat           B;
1625   Mat_MPIDense *m;
1626 
1627   PetscFunctionBegin;
1628   if (reuse == MAT_INITIAL_MATRIX) {
1629     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1630   } else if (reuse == MAT_REUSE_MATRIX) {
1631     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1632   }
1633 
1634   B = *newmat;
1635   PetscCall(PetscFree(B->defaultvectype));
1636   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
1637   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
1647   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1648   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1649   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
1650   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
1651   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
1652   m = (Mat_MPIDense *)(B->data);
1653   if (m->A) {
1654     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
1655     B->offloadmask = PETSC_OFFLOAD_BOTH;
1656   } else {
1657     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1658   }
1659   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
1660 
1661   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1662   PetscFunctionReturn(0);
1663 }
1664 #endif
1665 
1666 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1667   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1668   PetscInt      lda;
1669 
1670   PetscFunctionBegin;
1671   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1672   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1673   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1674   a->vecinuse = col + 1;
1675   PetscCall(MatDenseGetLDA(a->A, &lda));
1676   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1677   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1678   *v = a->cvec;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) {
1683   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1684 
1685   PetscFunctionBegin;
1686   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1687   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1688   a->vecinuse = 0;
1689   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1690   PetscCall(VecResetArray(a->cvec));
1691   if (v) *v = NULL;
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1696   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1697   PetscInt      lda;
1698 
1699   PetscFunctionBegin;
1700   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1701   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1702   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1703   a->vecinuse = col + 1;
1704   PetscCall(MatDenseGetLDA(a->A, &lda));
1705   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1706   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1707   PetscCall(VecLockReadPush(a->cvec));
1708   *v = a->cvec;
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) {
1713   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1714 
1715   PetscFunctionBegin;
1716   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1717   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1718   a->vecinuse = 0;
1719   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1720   PetscCall(VecLockReadPop(a->cvec));
1721   PetscCall(VecResetArray(a->cvec));
1722   if (v) *v = NULL;
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1727   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1728   PetscInt      lda;
1729 
1730   PetscFunctionBegin;
1731   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1732   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1733   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1734   a->vecinuse = col + 1;
1735   PetscCall(MatDenseGetLDA(a->A, &lda));
1736   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1737   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1738   *v = a->cvec;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) {
1743   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1747   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1748   a->vecinuse = 0;
1749   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1750   PetscCall(VecResetArray(a->cvec));
1751   if (v) *v = NULL;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) {
1756   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1757   Mat_MPIDense *c;
1758   MPI_Comm      comm;
1759   PetscInt      pbegin, pend;
1760 
1761   PetscFunctionBegin;
1762   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1763   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1764   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1765   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1766   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1767   if (!a->cmat) {
1768     PetscCall(MatCreate(comm, &a->cmat));
1769     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat));
1770     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1771     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1772     else {
1773       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1774       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1775       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1776     }
1777     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1778     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1779   } else {
1780     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1781     if (same && a->cmat->rmap->N != A->rmap->N) {
1782       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1783       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1784     }
1785     if (!same) {
1786       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1787       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1788       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1789       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1790       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1791     }
1792     if (cend - cbegin != a->cmat->cmap->N) {
1793       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1794       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1795       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1796       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1797     }
1798   }
1799   c = (Mat_MPIDense *)a->cmat->data;
1800   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1801   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1802 
1803   a->cmat->preallocated = PETSC_TRUE;
1804   a->cmat->assembled    = PETSC_TRUE;
1805 #if defined(PETSC_HAVE_DEVICE)
1806   a->cmat->offloadmask = c->A->offloadmask;
1807 #endif
1808   a->matinuse = cbegin + 1;
1809   *v          = a->cmat;
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) {
1814   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1815   Mat_MPIDense *c;
1816 
1817   PetscFunctionBegin;
1818   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1819   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1820   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1821   a->matinuse = 0;
1822   c           = (Mat_MPIDense *)a->cmat->data;
1823   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1824   if (v) *v = NULL;
1825 #if defined(PETSC_HAVE_DEVICE)
1826   A->offloadmask = a->A->offloadmask;
1827 #endif
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 /*MC
1832    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1833 
1834    Options Database Keys:
1835 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1836 
1837   Level: beginner
1838 
1839 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1840 M*/
1841 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) {
1842   Mat_MPIDense *a;
1843 
1844   PetscFunctionBegin;
1845   PetscCall(PetscNewLog(mat, &a));
1846   mat->data = (void *)a;
1847   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1848 
1849   mat->insertmode = NOT_SET_VALUES;
1850 
1851   /* build cache for off array entries formed */
1852   a->donotstash = PETSC_FALSE;
1853 
1854   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1855 
1856   /* stuff used for matrix vector multiply */
1857   a->lvec        = NULL;
1858   a->Mvctx       = NULL;
1859   a->roworiented = PETSC_TRUE;
1860 
1861   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1862   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1863   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1864   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1865   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1866   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1867   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1868   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1869   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1870   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1871   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1872   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1873   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1874   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1875   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1876   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1877   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1878   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1880   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1881   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1882 #if defined(PETSC_HAVE_ELEMENTAL)
1883   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1884 #endif
1885 #if defined(PETSC_HAVE_SCALAPACK)
1886   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1887 #endif
1888 #if defined(PETSC_HAVE_CUDA)
1889   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1890 #endif
1891   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1892   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1893   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1894 #if defined(PETSC_HAVE_CUDA)
1895   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1896   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1897 #endif
1898 
1899   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1900   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1901   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 /*MC
1906    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1907 
1908    Options Database Keys:
1909 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
1910 
1911   Level: beginner
1912 
1913 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1914 M*/
1915 #if defined(PETSC_HAVE_CUDA)
1916 #include <petsc/private/deviceimpl.h>
1917 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) {
1918   PetscFunctionBegin;
1919   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1920   PetscCall(MatCreate_MPIDense(B));
1921   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
1922   PetscFunctionReturn(0);
1923 }
1924 #endif
1925 
1926 /*MC
1927    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1928 
1929    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1930    and `MATMPIDENSE` otherwise.
1931 
1932    Options Database Keys:
1933 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1934 
1935   Level: beginner
1936 
1937 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1938 M*/
1939 
1940 /*MC
1941    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1942 
1943    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
1944    and `MATMPIDENSECUDA` otherwise.
1945 
1946    Options Database Keys:
1947 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
1948 
1949   Level: beginner
1950 
1951 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1952 M*/
1953 
1954 /*@C
1955    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1956 
1957    Collective
1958 
1959    Input Parameters:
1960 .  B - the matrix
1961 -  data - optional location of matrix data.  Set data=NULL for PETSc
1962    to control all matrix memory allocation.
1963 
1964    Notes:
1965    The dense format is fully compatible with standard Fortran 77
1966    storage by columns.
1967 
1968    The data input variable is intended primarily for Fortran programmers
1969    who wish to allocate their own matrix memory space.  Most users should
1970    set data=NULL.
1971 
1972    Level: intermediate
1973 
1974 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1975 @*/
1976 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) {
1977   PetscFunctionBegin;
1978   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1979   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 /*@
1984    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1985    array provided by the user. This is useful to avoid copying an array
1986    into a matrix
1987 
1988    Not Collective
1989 
1990    Input Parameters:
1991 +  mat - the matrix
1992 -  array - the array in column major order
1993 
1994    Note:
1995    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1996    freed when the matrix is destroyed.
1997 
1998    Level: developer
1999 
2000 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2001           `MatDenseReplaceArray()`
2002 @*/
2003 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) {
2004   PetscFunctionBegin;
2005   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2006   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2007   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2008 #if defined(PETSC_HAVE_DEVICE)
2009   mat->offloadmask = PETSC_OFFLOAD_CPU;
2010 #endif
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 /*@
2015    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2016 
2017    Not Collective
2018 
2019    Input Parameters:
2020 .  mat - the matrix
2021 
2022    Note:
2023    You can only call this after a call to `MatDensePlaceArray()`
2024 
2025    Level: developer
2026 
2027 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2028 @*/
2029 PetscErrorCode MatDenseResetArray(Mat mat) {
2030   PetscFunctionBegin;
2031   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2032   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2033   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 /*@
2038    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2039    array provided by the user. This is useful to avoid copying an array
2040    into a matrix
2041 
2042    Not Collective
2043 
2044    Input Parameters:
2045 +  mat - the matrix
2046 -  array - the array in column major order
2047 
2048    Note:
2049    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2050    freed by the user. It will be freed when the matrix is destroyed.
2051 
2052    Level: developer
2053 
2054 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2055 @*/
2056 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) {
2057   PetscFunctionBegin;
2058   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2059   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2060   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2061 #if defined(PETSC_HAVE_DEVICE)
2062   mat->offloadmask = PETSC_OFFLOAD_CPU;
2063 #endif
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #if defined(PETSC_HAVE_CUDA)
2068 /*@C
2069    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2070    array provided by the user. This is useful to avoid copying an array
2071    into a matrix
2072 
2073    Not Collective
2074 
2075    Input Parameters:
2076 +  mat - the matrix
2077 -  array - the array in column major order
2078 
2079    Note:
2080    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2081    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2082 
2083    Level: developer
2084 
2085 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2086 @*/
2087 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) {
2088   PetscFunctionBegin;
2089   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2090   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2091   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2092   mat->offloadmask = PETSC_OFFLOAD_GPU;
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 /*@C
2097    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2098 
2099    Not Collective
2100 
2101    Input Parameters:
2102 .  mat - the matrix
2103 
2104    Note:
2105    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2106 
2107    Level: developer
2108 
2109 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2110 @*/
2111 PetscErrorCode MatDenseCUDAResetArray(Mat mat) {
2112   PetscFunctionBegin;
2113   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2114   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2115   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 /*@C
2120    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2121    array provided by the user. This is useful to avoid copying an array
2122    into a matrix
2123 
2124    Not Collective
2125 
2126    Input Parameters:
2127 +  mat - the matrix
2128 -  array - the array in column major order
2129 
2130    Note:
2131    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2132    The memory passed in CANNOT be freed by the user. It will be freed
2133    when the matrix is destroyed. The array should respect the matrix leading dimension.
2134 
2135    Level: developer
2136 
2137 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2138 @*/
2139 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) {
2140   PetscFunctionBegin;
2141   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2142   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2143   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2144   mat->offloadmask = PETSC_OFFLOAD_GPU;
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 /*@C
2149    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2150 
2151    Not Collective
2152 
2153    Input Parameters:
2154 .  A - the matrix
2155 
2156    Output Parameters
2157 .  array - the GPU array in column major order
2158 
2159    Notes:
2160    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2161 
2162    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2163 
2164    Level: developer
2165 
2166 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2167 @*/
2168 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) {
2169   PetscFunctionBegin;
2170   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2171   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2172   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 /*@C
2177    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2178 
2179    Not Collective
2180 
2181    Input Parameters:
2182 +  A - the matrix
2183 -  array - the GPU array in column major order
2184 
2185    Level: developer
2186 
2187 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2188 @*/
2189 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) {
2190   PetscFunctionBegin;
2191   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2192   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2193   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2194   A->offloadmask = PETSC_OFFLOAD_GPU;
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 /*@C
2199    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2200 
2201    Not Collective
2202 
2203    Input Parameters:
2204 .  A - the matrix
2205 
2206    Output Parameters
2207 .  array - the GPU array in column major order
2208 
2209    Note:
2210    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2211 
2212    Level: developer
2213 
2214 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2215 @*/
2216 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) {
2217   PetscFunctionBegin;
2218   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2219   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 /*@C
2224    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2225 
2226    Not Collective
2227 
2228    Input Parameters:
2229 +  A - the matrix
2230 -  array - the GPU array in column major order
2231 
2232    Note:
2233    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2234 
2235    Level: developer
2236 
2237 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2238 @*/
2239 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) {
2240   PetscFunctionBegin;
2241   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 /*@C
2246    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2247 
2248    Not Collective
2249 
2250    Input Parameters:
2251 .  A - the matrix
2252 
2253    Output Parameters
2254 .  array - the GPU array in column major order
2255 
2256    Note:
2257    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2258 
2259    Level: developer
2260 
2261 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2262 @*/
2263 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) {
2264   PetscFunctionBegin;
2265   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2266   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2267   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 /*@C
2272    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2273 
2274    Not Collective
2275 
2276    Input Parameters:
2277 +  A - the matrix
2278 -  array - the GPU array in column major order
2279 
2280    Level: developer
2281 
2282 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2283 @*/
2284 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) {
2285   PetscFunctionBegin;
2286   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2287   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2288   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2289   A->offloadmask = PETSC_OFFLOAD_GPU;
2290   PetscFunctionReturn(0);
2291 }
2292 #endif
2293 
2294 /*@C
2295    MatCreateDense - Creates a matrix in `MATDENSE` format.
2296 
2297    Collective
2298 
2299    Input Parameters:
2300 +  comm - MPI communicator
2301 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2302 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2303 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2304 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2305 -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2306    to control all matrix memory allocation.
2307 
2308    Output Parameter:
2309 .  A - the matrix
2310 
2311    Notes:
2312    The dense format is fully compatible with standard Fortran 77
2313    storage by columns.
2314 
2315    Although local portions of the matrix are stored in column-major
2316    order, the matrix is partitioned across MPI ranks by row.
2317 
2318    The data input variable is intended primarily for Fortran programmers
2319    who wish to allocate their own matrix memory space.  Most users should
2320    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
2321 
2322    The user MUST specify either the local or global matrix dimensions
2323    (possibly both).
2324 
2325    Level: intermediate
2326 
2327 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2328 @*/
2329 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2330   PetscFunctionBegin;
2331   PetscCall(MatCreate(comm, A));
2332   PetscCall(MatSetSizes(*A, m, n, M, N));
2333   PetscCall(MatSetType(*A, MATDENSE));
2334   PetscCall(MatSeqDenseSetPreallocation(*A, data));
2335   PetscCall(MatMPIDenseSetPreallocation(*A, data));
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #if defined(PETSC_HAVE_CUDA)
2340 /*@C
2341    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
2342 
2343    Collective
2344 
2345    Input Parameters:
2346 +  comm - MPI communicator
2347 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2348 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2349 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2350 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2351 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2352    to control matrix memory allocation.
2353 
2354    Output Parameter:
2355 .  A - the matrix
2356 
2357    Level: intermediate
2358 
2359 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2360 @*/
2361 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) {
2362   PetscFunctionBegin;
2363   PetscCall(MatCreate(comm, A));
2364   PetscValidLogicalCollectiveBool(*A, !!data, 6);
2365   PetscCall(MatSetSizes(*A, m, n, M, N));
2366   PetscCall(MatSetType(*A, MATDENSECUDA));
2367   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
2368   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
2369   PetscFunctionReturn(0);
2370 }
2371 #endif
2372 
2373 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) {
2374   Mat           mat;
2375   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
2376 
2377   PetscFunctionBegin;
2378   *newmat = NULL;
2379   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
2380   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2381   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2382   a = (Mat_MPIDense *)mat->data;
2383 
2384   mat->factortype   = A->factortype;
2385   mat->assembled    = PETSC_TRUE;
2386   mat->preallocated = PETSC_TRUE;
2387 
2388   mat->insertmode = NOT_SET_VALUES;
2389   a->donotstash   = oldmat->donotstash;
2390 
2391   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2392   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2393 
2394   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2395   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
2396 
2397   *newmat = mat;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) {
2402   PetscBool isbinary;
2403 #if defined(PETSC_HAVE_HDF5)
2404   PetscBool ishdf5;
2405 #endif
2406 
2407   PetscFunctionBegin;
2408   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2409   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2410   /* force binary viewer to load .info file if it has not yet done so */
2411   PetscCall(PetscViewerSetUp(viewer));
2412   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2413 #if defined(PETSC_HAVE_HDF5)
2414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2415 #endif
2416   if (isbinary) {
2417     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2418 #if defined(PETSC_HAVE_HDF5)
2419   } else if (ishdf5) {
2420     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2421 #endif
2422   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) {
2427   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2428   Mat           a, b;
2429 
2430   PetscFunctionBegin;
2431   a = matA->A;
2432   b = matB->A;
2433   PetscCall(MatEqual(a, b, flag));
2434   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) {
2439   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2440 
2441   PetscFunctionBegin;
2442   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2443   PetscCall(MatDestroy(&atb->atb));
2444   PetscCall(PetscFree(atb));
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) {
2449   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2450 
2451   PetscFunctionBegin;
2452   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2453   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2454   PetscCall(PetscFree(abt));
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2459   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2460   Mat_TransMatMultDense *atb;
2461   MPI_Comm               comm;
2462   PetscMPIInt            size, *recvcounts;
2463   PetscScalar           *carray, *sendbuf;
2464   const PetscScalar     *atbarray;
2465   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2466   const PetscInt        *ranges;
2467 
2468   PetscFunctionBegin;
2469   MatCheckProduct(C, 3);
2470   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2471   atb        = (Mat_TransMatMultDense *)C->product->data;
2472   recvcounts = atb->recvcounts;
2473   sendbuf    = atb->sendbuf;
2474 
2475   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2476   PetscCallMPI(MPI_Comm_size(comm, &size));
2477 
2478   /* compute atbarray = aseq^T * bseq */
2479   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2480 
2481   PetscCall(MatGetOwnershipRanges(C, &ranges));
2482 
2483   /* arrange atbarray into sendbuf */
2484   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2485   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2486   for (proc = 0, k = 0; proc < size; proc++) {
2487     for (j = 0; j < cN; j++) {
2488       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2489     }
2490   }
2491   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2492 
2493   /* sum all atbarray to local values of C */
2494   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2495   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2496   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2497   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2498   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2503   MPI_Comm               comm;
2504   PetscMPIInt            size;
2505   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2506   Mat_TransMatMultDense *atb;
2507   PetscBool              cisdense;
2508   PetscInt               i;
2509   const PetscInt        *ranges;
2510 
2511   PetscFunctionBegin;
2512   MatCheckProduct(C, 4);
2513   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2514   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2515   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2516     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2517   }
2518 
2519   /* create matrix product C */
2520   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2521   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2522   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2523   PetscCall(MatSetUp(C));
2524 
2525   /* create data structure for reuse C */
2526   PetscCallMPI(MPI_Comm_size(comm, &size));
2527   PetscCall(PetscNew(&atb));
2528   cM = C->rmap->N;
2529   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2530   PetscCall(MatGetOwnershipRanges(C, &ranges));
2531   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2532 
2533   C->product->data    = atb;
2534   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2539   MPI_Comm               comm;
2540   PetscMPIInt            i, size;
2541   PetscInt               maxRows, bufsiz;
2542   PetscMPIInt            tag;
2543   PetscInt               alg;
2544   Mat_MatTransMultDense *abt;
2545   Mat_Product           *product = C->product;
2546   PetscBool              flg;
2547 
2548   PetscFunctionBegin;
2549   MatCheckProduct(C, 4);
2550   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2551   /* check local size of A and B */
2552   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2553 
2554   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2555   alg = flg ? 0 : 1;
2556 
2557   /* setup matrix product C */
2558   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2559   PetscCall(MatSetType(C, MATMPIDENSE));
2560   PetscCall(MatSetUp(C));
2561   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2562 
2563   /* create data structure for reuse C */
2564   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2565   PetscCallMPI(MPI_Comm_size(comm, &size));
2566   PetscCall(PetscNew(&abt));
2567   abt->tag = tag;
2568   abt->alg = alg;
2569   switch (alg) {
2570   case 1: /* alg: "cyclic" */
2571     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2572     bufsiz = A->cmap->N * maxRows;
2573     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2574     break;
2575   default: /* alg: "allgatherv" */
2576     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2577     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2578     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2579     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2580     break;
2581   }
2582 
2583   C->product->data                = abt;
2584   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2585   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) {
2590   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2591   Mat_MatTransMultDense *abt;
2592   MPI_Comm               comm;
2593   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2594   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2595   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2596   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2597   const PetscScalar     *av, *bv;
2598   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2599   MPI_Request            reqs[2];
2600   const PetscInt        *ranges;
2601 
2602   PetscFunctionBegin;
2603   MatCheckProduct(C, 3);
2604   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2605   abt = (Mat_MatTransMultDense *)C->product->data;
2606   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2607   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2608   PetscCallMPI(MPI_Comm_size(comm, &size));
2609   PetscCall(MatDenseGetArrayRead(a->A, &av));
2610   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2611   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2612   PetscCall(MatDenseGetLDA(a->A, &i));
2613   PetscCall(PetscBLASIntCast(i, &alda));
2614   PetscCall(MatDenseGetLDA(b->A, &i));
2615   PetscCall(PetscBLASIntCast(i, &blda));
2616   PetscCall(MatDenseGetLDA(c->A, &i));
2617   PetscCall(PetscBLASIntCast(i, &clda));
2618   PetscCall(MatGetOwnershipRanges(B, &ranges));
2619   bn = B->rmap->n;
2620   if (blda == bn) {
2621     sendbuf = (PetscScalar *)bv;
2622   } else {
2623     sendbuf = abt->buf[0];
2624     for (k = 0, i = 0; i < cK; i++) {
2625       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2626     }
2627   }
2628   if (size > 1) {
2629     sendto   = (rank + size - 1) % size;
2630     recvfrom = (rank + size + 1) % size;
2631   } else {
2632     sendto = recvfrom = 0;
2633   }
2634   PetscCall(PetscBLASIntCast(cK, &ck));
2635   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2636   recvisfrom = rank;
2637   for (i = 0; i < size; i++) {
2638     /* we have finished receiving in sending, bufs can be read/modified */
2639     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2640     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2641 
2642     if (nextrecvisfrom != rank) {
2643       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2644       sendsiz = cK * bn;
2645       recvsiz = cK * nextbn;
2646       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2647       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2648       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2649     }
2650 
2651     /* local aseq * sendbuf^T */
2652     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2653     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2654 
2655     if (nextrecvisfrom != rank) {
2656       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2657       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2658     }
2659     bn         = nextbn;
2660     recvisfrom = nextrecvisfrom;
2661     sendbuf    = recvbuf;
2662   }
2663   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2664   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2665   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2666   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2667   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) {
2672   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2673   Mat_MatTransMultDense *abt;
2674   MPI_Comm               comm;
2675   PetscMPIInt            size;
2676   PetscScalar           *cv, *sendbuf, *recvbuf;
2677   const PetscScalar     *av, *bv;
2678   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2679   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2680   PetscBLASInt           cm, cn, ck, alda, clda;
2681 
2682   PetscFunctionBegin;
2683   MatCheckProduct(C, 3);
2684   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2685   abt = (Mat_MatTransMultDense *)C->product->data;
2686   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2687   PetscCallMPI(MPI_Comm_size(comm, &size));
2688   PetscCall(MatDenseGetArrayRead(a->A, &av));
2689   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2690   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2691   PetscCall(MatDenseGetLDA(a->A, &i));
2692   PetscCall(PetscBLASIntCast(i, &alda));
2693   PetscCall(MatDenseGetLDA(b->A, &blda));
2694   PetscCall(MatDenseGetLDA(c->A, &i));
2695   PetscCall(PetscBLASIntCast(i, &clda));
2696   /* copy transpose of B into buf[0] */
2697   bn      = B->rmap->n;
2698   sendbuf = abt->buf[0];
2699   recvbuf = abt->buf[1];
2700   for (k = 0, j = 0; j < bn; j++) {
2701     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2702   }
2703   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2704   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2705   PetscCall(PetscBLASIntCast(cK, &ck));
2706   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2707   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2708   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2709   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2710   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2711   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2712   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2713   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2718   Mat_MatTransMultDense *abt;
2719 
2720   PetscFunctionBegin;
2721   MatCheckProduct(C, 3);
2722   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2723   abt = (Mat_MatTransMultDense *)C->product->data;
2724   switch (abt->alg) {
2725   case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break;
2726   default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break;
2727   }
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) {
2732   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2733 
2734   PetscFunctionBegin;
2735   PetscCall(MatDestroy(&ab->Ce));
2736   PetscCall(MatDestroy(&ab->Ae));
2737   PetscCall(MatDestroy(&ab->Be));
2738   PetscCall(PetscFree(ab));
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 #if defined(PETSC_HAVE_ELEMENTAL)
2743 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) {
2744   Mat_MatMultDense *ab;
2745 
2746   PetscFunctionBegin;
2747   MatCheckProduct(C, 3);
2748   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2749   ab = (Mat_MatMultDense *)C->product->data;
2750   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2751   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2752   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2753   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
2758   Mat               Ae, Be, Ce;
2759   Mat_MatMultDense *ab;
2760 
2761   PetscFunctionBegin;
2762   MatCheckProduct(C, 4);
2763   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2764   /* check local size of A and B */
2765   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2766     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2767   }
2768 
2769   /* create elemental matrices Ae and Be */
2770   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2771   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2772   PetscCall(MatSetType(Ae, MATELEMENTAL));
2773   PetscCall(MatSetUp(Ae));
2774   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2775 
2776   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2777   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2778   PetscCall(MatSetType(Be, MATELEMENTAL));
2779   PetscCall(MatSetUp(Be));
2780   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2781 
2782   /* compute symbolic Ce = Ae*Be */
2783   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2784   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2785 
2786   /* setup C */
2787   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2788   PetscCall(MatSetType(C, MATDENSE));
2789   PetscCall(MatSetUp(C));
2790 
2791   /* create data structure for reuse Cdense */
2792   PetscCall(PetscNew(&ab));
2793   ab->Ae = Ae;
2794   ab->Be = Be;
2795   ab->Ce = Ce;
2796 
2797   C->product->data       = ab;
2798   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2799   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2800   PetscFunctionReturn(0);
2801 }
2802 #endif
2803 /* ----------------------------------------------- */
2804 #if defined(PETSC_HAVE_ELEMENTAL)
2805 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) {
2806   PetscFunctionBegin;
2807   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2808   C->ops->productsymbolic = MatProductSymbolic_AB;
2809   PetscFunctionReturn(0);
2810 }
2811 #endif
2812 
2813 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) {
2814   Mat_Product *product = C->product;
2815   Mat          A = product->A, B = product->B;
2816 
2817   PetscFunctionBegin;
2818   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2819     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2820   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2821   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) {
2826   Mat_Product *product     = C->product;
2827   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2828   PetscInt     alg, nalg = 2;
2829   PetscBool    flg = PETSC_FALSE;
2830 
2831   PetscFunctionBegin;
2832   /* Set default algorithm */
2833   alg = 0; /* default is allgatherv */
2834   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2835   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2836 
2837   /* Get runtime option */
2838   if (product->api_user) {
2839     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2840     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2841     PetscOptionsEnd();
2842   } else {
2843     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2844     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2845     PetscOptionsEnd();
2846   }
2847   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2848 
2849   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2850   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) {
2855   Mat_Product *product = C->product;
2856 
2857   PetscFunctionBegin;
2858   switch (product->type) {
2859 #if defined(PETSC_HAVE_ELEMENTAL)
2860   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break;
2861 #endif
2862   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break;
2863   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break;
2864   default: break;
2865   }
2866   PetscFunctionReturn(0);
2867 }
2868