xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b6e6beb40ac0a73ccc19b70153df96d5058dce43)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the sequential or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscValidPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if defined(PETSC_HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif (PETSC_HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if defined(PETSC_HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif (PETSC_HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
554   PetscScalar       *x, zero = 0.0;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecSet(v, zero));
559   PetscCall(VecGetArray(v, &x));
560   PetscCall(VecGetSize(v, &n));
561   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579 #endif
580   PetscCall(MatStashDestroy_Private(&mat->stash));
581   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583   PetscCall(MatDestroy(&mdn->A));
584   PetscCall(VecDestroy(&mdn->lvec));
585   PetscCall(PetscSFDestroy(&mdn->Mvctx));
586   PetscCall(VecDestroy(&mdn->cvec));
587   PetscCall(MatDestroy(&mdn->cmat));
588 
589   PetscCall(PetscFree(mat->data));
590   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
591 
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605 #if defined(PETSC_HAVE_ELEMENTAL)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607 #endif
608 #if defined(PETSC_HAVE_SCALAPACK)
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610 #endif
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614 #if defined(PETSC_HAVE_CUDA)
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632 #endif
633 #if defined(PETSC_HAVE_HIP)
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(PETSC_SUCCESS);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(PETSC_SUCCESS);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(PetscViewerFlush(viewer));
747     PetscCall(MatDestroy(&A));
748   }
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753 {
754   PetscBool iascii, isbinary, isdraw, issocket;
755 
756   PetscFunctionBegin;
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
761 
762   if (iascii || issocket || isdraw) {
763     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771   Mat            mdn = mat->A;
772   PetscLogDouble isend[5], irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size = 1.0;
776 
777   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
778 
779   isend[0] = info->nz_used;
780   isend[1] = info->nz_allocated;
781   isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;
783   isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A, 1);
823     PetscCall(MatSetOption(a->A, op, flg));
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A, 1);
827     a->roworiented = flg;
828     PetscCall(MatSetOption(a->A, op, flg));
829     break;
830   case MAT_FORCE_DIAGONAL_ENTRIES:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833   case MAT_SORTED_FULL:
834     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835     break;
836   case MAT_IGNORE_OFF_PROC_ENTRIES:
837     a->donotstash = flg;
838     break;
839   case MAT_SYMMETRIC:
840   case MAT_STRUCTURALLY_SYMMETRIC:
841   case MAT_HERMITIAN:
842   case MAT_SYMMETRY_ETERNAL:
843   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844   case MAT_SPD:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846   case MAT_IGNORE_ZERO_ENTRIES:
847   case MAT_SPD_ETERNAL:
848     /* if the diagonal matrix is square it inherits some of the properties above */
849     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850     break;
851   default:
852     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853   }
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860   const PetscScalar *l;
861   PetscScalar        x, *v, *vv, *r;
862   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
863 
864   PetscFunctionBegin;
865   PetscCall(MatDenseGetArray(mdn->A, &vv));
866   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867   PetscCall(MatGetLocalSize(A, &s2, &s3));
868   if (ll) {
869     PetscCall(VecGetLocalSize(ll, &s2a));
870     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871     PetscCall(VecGetArrayRead(ll, &l));
872     for (i = 0; i < m; i++) {
873       x = l[i];
874       v = vv + i;
875       for (j = 0; j < n; j++) {
876         (*v) *= x;
877         v += lda;
878       }
879     }
880     PetscCall(VecRestoreArrayRead(ll, &l));
881     PetscCall(PetscLogFlops(1.0 * n * m));
882   }
883   if (rr) {
884     const PetscScalar *ar;
885 
886     PetscCall(VecGetLocalSize(rr, &s3a));
887     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888     PetscCall(VecGetArrayRead(rr, &ar));
889     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890     PetscCall(VecGetArray(mdn->lvec, &r));
891     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(VecRestoreArrayRead(rr, &ar));
894     for (i = 0; i < n; i++) {
895       x = r[i];
896       v = vv + i * lda;
897       for (j = 0; j < m; j++) (*v++) *= x;
898     }
899     PetscCall(VecRestoreArray(mdn->lvec, &r));
900     PetscCall(PetscLogFlops(1.0 * n * m));
901   }
902   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907 {
908   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909   PetscInt           i, j;
910   PetscMPIInt        size;
911   PetscReal          sum = 0.0;
912   const PetscScalar *av, *v;
913 
914   PetscFunctionBegin;
915   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916   v = av;
917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918   if (size == 1) {
919     PetscCall(MatNorm(mdn->A, type, nrm));
920   } else {
921     if (type == NORM_FROBENIUS) {
922       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923         sum += PetscRealPart(PetscConj(*v) * (*v));
924         v++;
925       }
926       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927       *nrm = PetscSqrtReal(*nrm);
928       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929     } else if (type == NORM_1) {
930       PetscReal *tmp, *tmp2;
931       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932       *nrm = 0.0;
933       v    = av;
934       for (j = 0; j < mdn->A->cmap->n; j++) {
935         for (i = 0; i < mdn->A->rmap->n; i++) {
936           tmp[j] += PetscAbsScalar(*v);
937           v++;
938         }
939       }
940       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941       for (j = 0; j < A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       PetscCall(PetscFree2(tmp, tmp2));
945       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       PetscCall(MatNorm(mdn->A, type, &ntemp));
949       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951   }
952   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953   PetscFunctionReturn(PETSC_SUCCESS);
954 }
955 
956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957 {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959   Mat           B;
960   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961   PetscInt      j, i, lda;
962   PetscScalar  *v;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971   } else B = *matout;
972 
973   m = a->A->rmap->n;
974   n = a->A->cmap->n;
975   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(MatDenseGetLDA(a->A, &lda));
977   PetscCall(PetscMalloc1(m, &rwork));
978   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979   for (j = 0; j < n; j++) {
980     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981     v += lda;
982   }
983   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984   PetscCall(PetscFree(rwork));
985   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     PetscCall(MatHeaderMerge(A, &B));
991   }
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
997 
998 PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(PetscLayoutSetUp(A->rmap));
1002   PetscCall(PetscLayoutSetUp(A->cmap));
1003   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008 {
1009   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatConjugate(a->A));
1022   PetscFunctionReturn(PETSC_SUCCESS);
1023 }
1024 
1025 PetscErrorCode MatRealPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatRealPart(a->A));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(MatImaginaryPart(a->A));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044 {
1045   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1046 
1047   PetscFunctionBegin;
1048   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1055 
1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057 {
1058   PetscInt      i, m, n;
1059   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060   PetscReal    *work;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatGetSize(A, &m, &n));
1064   PetscCall(PetscMalloc1(n, &work));
1065   if (type == REDUCTION_MEAN_REALPART) {
1066     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069   } else {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071   }
1072   if (type == NORM_2) {
1073     for (i = 0; i < n; i++) work[i] *= work[i];
1074   }
1075   if (type == NORM_INFINITY) {
1076     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077   } else {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079   }
1080   PetscCall(PetscFree(work));
1081   if (type == NORM_2) {
1082     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084     for (i = 0; i < n; i++) reductions[i] /= m;
1085   }
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 #if defined(PETSC_HAVE_CUDA)
1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1091 {
1092   PetscScalar *da;
1093   PetscInt     lda;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(MatDenseCUDAGetArray(A, &da));
1097   PetscCall(MatDenseGetLDA(A, &lda));
1098   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1099   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1100   PetscCall(MatDenseCUDARestoreArray(A, &da));
1101   PetscFunctionReturn(PETSC_SUCCESS);
1102 }
1103 
1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1105 {
1106   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1107   PetscInt      lda;
1108 
1109   PetscFunctionBegin;
1110   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1111   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1112   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1113   a->vecinuse = col + 1;
1114   PetscCall(MatDenseGetLDA(a->A, &lda));
1115   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1116   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1117   *v = a->cvec;
1118   PetscFunctionReturn(PETSC_SUCCESS);
1119 }
1120 
1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1122 {
1123   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1124 
1125   PetscFunctionBegin;
1126   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1127   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1128   a->vecinuse = 0;
1129   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1130   PetscCall(VecCUDAResetArray(a->cvec));
1131   if (v) *v = NULL;
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136 {
1137   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138   PetscInt      lda;
1139 
1140   PetscFunctionBegin;
1141   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1142   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1143   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1144   a->vecinuse = col + 1;
1145   PetscCall(MatDenseGetLDA(a->A, &lda));
1146   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1147   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1148   PetscCall(VecLockReadPush(a->cvec));
1149   *v = a->cvec;
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1154 {
1155   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1159   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1160   a->vecinuse = 0;
1161   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1162   PetscCall(VecLockReadPop(a->cvec));
1163   PetscCall(VecCUDAResetArray(a->cvec));
1164   if (v) *v = NULL;
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1169 {
1170   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1171   PetscInt      lda;
1172 
1173   PetscFunctionBegin;
1174   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1175   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1176   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1177   a->vecinuse = col + 1;
1178   PetscCall(MatDenseGetLDA(a->A, &lda));
1179   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1180   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1181   *v = a->cvec;
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1186 {
1187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1188 
1189   PetscFunctionBegin;
1190   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1191   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1192   a->vecinuse = 0;
1193   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1194   PetscCall(VecCUDAResetArray(a->cvec));
1195   if (v) *v = NULL;
1196   PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1200 {
1201   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1205   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1206   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1207   PetscFunctionReturn(PETSC_SUCCESS);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1211 {
1212   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1213 
1214   PetscFunctionBegin;
1215   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1216   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1217   PetscCall(MatDenseCUDAResetArray(l->A));
1218   PetscFunctionReturn(PETSC_SUCCESS);
1219 }
1220 
1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1222 {
1223   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1224 
1225   PetscFunctionBegin;
1226   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1227   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1228   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1229   PetscFunctionReturn(PETSC_SUCCESS);
1230 }
1231 
1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1233 {
1234   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1238   PetscFunctionReturn(PETSC_SUCCESS);
1239 }
1240 
1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1242 {
1243   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1247   PetscFunctionReturn(PETSC_SUCCESS);
1248 }
1249 
1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1251 {
1252   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1253 
1254   PetscFunctionBegin;
1255   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1256   PetscFunctionReturn(PETSC_SUCCESS);
1257 }
1258 
1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1260 {
1261   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1262 
1263   PetscFunctionBegin;
1264   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1265   PetscFunctionReturn(PETSC_SUCCESS);
1266 }
1267 
1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1269 {
1270   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1271 
1272   PetscFunctionBegin;
1273   PetscCall(MatDenseCUDAGetArray(l->A, a));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1278 {
1279   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1293 
1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1295 {
1296   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1297 
1298   PetscFunctionBegin;
1299   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1300   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1301   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1302   mat->boundtocpu = bind;
1303   if (!bind) {
1304     PetscBool iscuda;
1305 
1306     PetscCall(PetscFree(mat->defaultrandtype));
1307     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1308     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1309     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1310     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1311     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1312     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1313     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1314     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1315     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1316     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1317     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1318     mat->ops->shift = MatShift_MPIDenseCUDA;
1319   } else {
1320     PetscCall(PetscFree(mat->defaultrandtype));
1321     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1322     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1323     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1324     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1325     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1326     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1327     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1328     mat->ops->shift = MatShift_MPIDense;
1329   }
1330   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1331   PetscFunctionReturn(PETSC_SUCCESS);
1332 }
1333 
1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1335 {
1336   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1337   PetscBool     iscuda;
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1341   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1342   if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1343   PetscCall(PetscLayoutSetUp(A->rmap));
1344   PetscCall(PetscLayoutSetUp(A->cmap));
1345   if (!d->A) {
1346     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1347     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1348   }
1349   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1350   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1351   A->preallocated = PETSC_TRUE;
1352   A->assembled    = PETSC_TRUE;
1353   PetscFunctionReturn(PETSC_SUCCESS);
1354 }
1355 #endif
1356 
1357 #if defined(PETSC_HAVE_HIP)
1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha)
1359 {
1360   PetscScalar *da;
1361   PetscInt     lda;
1362 
1363   PetscFunctionBegin;
1364   PetscCall(MatDenseHIPGetArray(A, &da));
1365   PetscCall(MatDenseGetLDA(A, &lda));
1366   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1367   PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1368   PetscCall(MatDenseHIPRestoreArray(A, &da));
1369   PetscFunctionReturn(PETSC_SUCCESS);
1370 }
1371 
1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1373 {
1374   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1375   PetscInt      lda;
1376 
1377   PetscFunctionBegin;
1378   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1379   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1380   if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1381   a->vecinuse = col + 1;
1382   PetscCall(MatDenseGetLDA(a->A, &lda));
1383   PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1384   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1385   *v = a->cvec;
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1390 {
1391   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1392 
1393   PetscFunctionBegin;
1394   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1395   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1396   a->vecinuse = 0;
1397   PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1398   PetscCall(VecHIPResetArray(a->cvec));
1399   if (v) *v = NULL;
1400   PetscFunctionReturn(PETSC_SUCCESS);
1401 }
1402 
1403 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1404 {
1405   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1406   PetscInt      lda;
1407 
1408   PetscFunctionBegin;
1409   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1410   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1411   if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1412   a->vecinuse = col + 1;
1413   PetscCall(MatDenseGetLDA(a->A, &lda));
1414   PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse));
1415   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1416   PetscCall(VecLockReadPush(a->cvec));
1417   *v = a->cvec;
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1422 {
1423   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1424 
1425   PetscFunctionBegin;
1426   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1427   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1428   a->vecinuse = 0;
1429   PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse));
1430   PetscCall(VecLockReadPop(a->cvec));
1431   PetscCall(VecHIPResetArray(a->cvec));
1432   if (v) *v = NULL;
1433   PetscFunctionReturn(PETSC_SUCCESS);
1434 }
1435 
1436 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1437 {
1438   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1439   PetscInt      lda;
1440 
1441   PetscFunctionBegin;
1442   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1443   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1444   if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1445   a->vecinuse = col + 1;
1446   PetscCall(MatDenseGetLDA(a->A, &lda));
1447   PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1448   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1449   *v = a->cvec;
1450   PetscFunctionReturn(PETSC_SUCCESS);
1451 }
1452 
1453 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1454 {
1455   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1456 
1457   PetscFunctionBegin;
1458   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1459   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1460   a->vecinuse = 0;
1461   PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1462   PetscCall(VecHIPResetArray(a->cvec));
1463   if (v) *v = NULL;
1464   PetscFunctionReturn(PETSC_SUCCESS);
1465 }
1466 
1467 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1468 {
1469   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1470 
1471   PetscFunctionBegin;
1472   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1473   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1474   PetscCall(MatDenseHIPPlaceArray(l->A, a));
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A)
1479 {
1480   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1481 
1482   PetscFunctionBegin;
1483   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1484   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1485   PetscCall(MatDenseHIPResetArray(l->A));
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 
1489 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1490 {
1491   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1492 
1493   PetscFunctionBegin;
1494   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1495   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1496   PetscCall(MatDenseHIPReplaceArray(l->A, a));
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1501 {
1502   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1503 
1504   PetscFunctionBegin;
1505   PetscCall(MatDenseHIPGetArrayWrite(l->A, a));
1506   PetscFunctionReturn(PETSC_SUCCESS);
1507 }
1508 
1509 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1510 {
1511   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1512 
1513   PetscFunctionBegin;
1514   PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a));
1515   PetscFunctionReturn(PETSC_SUCCESS);
1516 }
1517 
1518 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1519 {
1520   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1521 
1522   PetscFunctionBegin;
1523   PetscCall(MatDenseHIPGetArrayRead(l->A, a));
1524   PetscFunctionReturn(PETSC_SUCCESS);
1525 }
1526 
1527 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1528 {
1529   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1530 
1531   PetscFunctionBegin;
1532   PetscCall(MatDenseHIPRestoreArrayRead(l->A, a));
1533   PetscFunctionReturn(PETSC_SUCCESS);
1534 }
1535 
1536 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a)
1537 {
1538   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1539 
1540   PetscFunctionBegin;
1541   PetscCall(MatDenseHIPGetArray(l->A, a));
1542   PetscFunctionReturn(PETSC_SUCCESS);
1543 }
1544 
1545 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a)
1546 {
1547   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1548 
1549   PetscFunctionBegin;
1550   PetscCall(MatDenseHIPRestoreArray(l->A, a));
1551   PetscFunctionReturn(PETSC_SUCCESS);
1552 }
1553 
1554 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1555 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1556 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1557 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1558 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1559 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1560 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1561 
1562 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind)
1563 {
1564   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1565 
1566   PetscFunctionBegin;
1567   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1568   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1569   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1570   mat->boundtocpu = bind;
1571   if (!bind) {
1572     PetscBool iscuda;
1573 
1574     PetscCall(PetscFree(mat->defaultrandtype));
1575     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1576     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda));
1577     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1578     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda));
1579     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1580     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP));
1581     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP));
1582     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP));
1583     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP));
1584     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP));
1585     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP));
1586     mat->ops->shift = MatShift_MPIDenseHIP;
1587   } else {
1588     PetscCall(PetscFree(mat->defaultrandtype));
1589     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1590     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1591     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1592     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1593     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1594     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1595     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1596     mat->ops->shift = MatShift_MPIDense;
1597   }
1598   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data)
1603 {
1604   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1605   PetscBool     iscuda;
1606 
1607   PetscFunctionBegin;
1608   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1609   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda));
1610   if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1611   PetscCall(PetscLayoutSetUp(A->rmap));
1612   PetscCall(PetscLayoutSetUp(A->cmap));
1613   if (!d->A) {
1614     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1615     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1616   }
1617   PetscCall(MatSetType(d->A, MATSEQDENSEHIP));
1618   PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data));
1619   A->preallocated = PETSC_TRUE;
1620   A->assembled    = PETSC_TRUE;
1621   PetscFunctionReturn(PETSC_SUCCESS);
1622 }
1623 #endif
1624 
1625 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1626 {
1627   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(MatSetRandom(d->A, rctx));
1631 #if defined(PETSC_HAVE_DEVICE)
1632   x->offloadmask = d->A->offloadmask;
1633 #endif
1634   PetscFunctionReturn(PETSC_SUCCESS);
1635 }
1636 
1637 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1638 {
1639   PetscFunctionBegin;
1640   *missing = PETSC_FALSE;
1641   PetscFunctionReturn(PETSC_SUCCESS);
1642 }
1643 
1644 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1645 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1646 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1647 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1648 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1649 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1650 
1651 /* -------------------------------------------------------------------*/
1652 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1653                                        MatGetRow_MPIDense,
1654                                        MatRestoreRow_MPIDense,
1655                                        MatMult_MPIDense,
1656                                        /*  4*/ MatMultAdd_MPIDense,
1657                                        MatMultTranspose_MPIDense,
1658                                        MatMultTransposeAdd_MPIDense,
1659                                        NULL,
1660                                        NULL,
1661                                        NULL,
1662                                        /* 10*/ NULL,
1663                                        NULL,
1664                                        NULL,
1665                                        NULL,
1666                                        MatTranspose_MPIDense,
1667                                        /* 15*/ MatGetInfo_MPIDense,
1668                                        MatEqual_MPIDense,
1669                                        MatGetDiagonal_MPIDense,
1670                                        MatDiagonalScale_MPIDense,
1671                                        MatNorm_MPIDense,
1672                                        /* 20*/ MatAssemblyBegin_MPIDense,
1673                                        MatAssemblyEnd_MPIDense,
1674                                        MatSetOption_MPIDense,
1675                                        MatZeroEntries_MPIDense,
1676                                        /* 24*/ MatZeroRows_MPIDense,
1677                                        NULL,
1678                                        NULL,
1679                                        NULL,
1680                                        NULL,
1681                                        /* 29*/ MatSetUp_MPIDense,
1682                                        NULL,
1683                                        NULL,
1684                                        MatGetDiagonalBlock_MPIDense,
1685                                        NULL,
1686                                        /* 34*/ MatDuplicate_MPIDense,
1687                                        NULL,
1688                                        NULL,
1689                                        NULL,
1690                                        NULL,
1691                                        /* 39*/ MatAXPY_MPIDense,
1692                                        MatCreateSubMatrices_MPIDense,
1693                                        NULL,
1694                                        MatGetValues_MPIDense,
1695                                        MatCopy_MPIDense,
1696                                        /* 44*/ NULL,
1697                                        MatScale_MPIDense,
1698                                        MatShift_MPIDense,
1699                                        NULL,
1700                                        NULL,
1701                                        /* 49*/ MatSetRandom_MPIDense,
1702                                        NULL,
1703                                        NULL,
1704                                        NULL,
1705                                        NULL,
1706                                        /* 54*/ NULL,
1707                                        NULL,
1708                                        NULL,
1709                                        NULL,
1710                                        NULL,
1711                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1712                                        MatDestroy_MPIDense,
1713                                        MatView_MPIDense,
1714                                        NULL,
1715                                        NULL,
1716                                        /* 64*/ NULL,
1717                                        NULL,
1718                                        NULL,
1719                                        NULL,
1720                                        NULL,
1721                                        /* 69*/ NULL,
1722                                        NULL,
1723                                        NULL,
1724                                        NULL,
1725                                        NULL,
1726                                        /* 74*/ NULL,
1727                                        NULL,
1728                                        NULL,
1729                                        NULL,
1730                                        NULL,
1731                                        /* 79*/ NULL,
1732                                        NULL,
1733                                        NULL,
1734                                        NULL,
1735                                        /* 83*/ MatLoad_MPIDense,
1736                                        NULL,
1737                                        NULL,
1738                                        NULL,
1739                                        NULL,
1740                                        NULL,
1741                                        /* 89*/ NULL,
1742                                        NULL,
1743                                        NULL,
1744                                        NULL,
1745                                        NULL,
1746                                        /* 94*/ NULL,
1747                                        NULL,
1748                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1749                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1750                                        NULL,
1751                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1752                                        NULL,
1753                                        NULL,
1754                                        MatConjugate_MPIDense,
1755                                        NULL,
1756                                        /*104*/ NULL,
1757                                        MatRealPart_MPIDense,
1758                                        MatImaginaryPart_MPIDense,
1759                                        NULL,
1760                                        NULL,
1761                                        /*109*/ NULL,
1762                                        NULL,
1763                                        NULL,
1764                                        MatGetColumnVector_MPIDense,
1765                                        MatMissingDiagonal_MPIDense,
1766                                        /*114*/ NULL,
1767                                        NULL,
1768                                        NULL,
1769                                        NULL,
1770                                        NULL,
1771                                        /*119*/ NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        NULL,
1775                                        NULL,
1776                                        /*124*/ NULL,
1777                                        MatGetColumnReductions_MPIDense,
1778                                        NULL,
1779                                        NULL,
1780                                        NULL,
1781                                        /*129*/ NULL,
1782                                        NULL,
1783                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1784                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1785                                        NULL,
1786                                        /*134*/ NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        NULL,
1791                                        /*139*/ NULL,
1792                                        NULL,
1793                                        NULL,
1794                                        NULL,
1795                                        NULL,
1796                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1797                                        /*145*/ NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        NULL,
1802                                        /*150*/ NULL,
1803                                        NULL};
1804 
1805 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1806 {
1807   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1808   MatType       mtype = MATSEQDENSE;
1809 
1810   PetscFunctionBegin;
1811   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1812   PetscCall(PetscLayoutSetUp(mat->rmap));
1813   PetscCall(PetscLayoutSetUp(mat->cmap));
1814   if (!a->A) {
1815     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1816     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1817   }
1818 #if defined(PETSC_HAVE_CUDA)
1819   PetscBool iscuda;
1820   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1821   if (iscuda) mtype = MATSEQDENSECUDA;
1822 #endif
1823 #if defined(PETSC_HAVE_HIP)
1824   PetscBool iship;
1825   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1826   if (iship) mtype = MATSEQDENSEHIP;
1827 #endif
1828   PetscCall(MatSetType(a->A, mtype));
1829   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1830 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1831   mat->offloadmask = a->A->offloadmask;
1832 #endif
1833   mat->preallocated = PETSC_TRUE;
1834   mat->assembled    = PETSC_TRUE;
1835   PetscFunctionReturn(PETSC_SUCCESS);
1836 }
1837 
1838 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1839 {
1840   Mat B, C;
1841 
1842   PetscFunctionBegin;
1843   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1844   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1845   PetscCall(MatDestroy(&C));
1846   if (reuse == MAT_REUSE_MATRIX) {
1847     C = *newmat;
1848   } else C = NULL;
1849   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1850   PetscCall(MatDestroy(&B));
1851   if (reuse == MAT_INPLACE_MATRIX) {
1852     PetscCall(MatHeaderReplace(A, &C));
1853   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1854   PetscFunctionReturn(PETSC_SUCCESS);
1855 }
1856 
1857 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1858 {
1859   Mat B, C;
1860 
1861   PetscFunctionBegin;
1862   PetscCall(MatDenseGetLocalMatrix(A, &C));
1863   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1864   if (reuse == MAT_REUSE_MATRIX) {
1865     C = *newmat;
1866   } else C = NULL;
1867   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1868   PetscCall(MatDestroy(&B));
1869   if (reuse == MAT_INPLACE_MATRIX) {
1870     PetscCall(MatHeaderReplace(A, &C));
1871   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 #if defined(PETSC_HAVE_ELEMENTAL)
1876 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1877 {
1878   Mat          mat_elemental;
1879   PetscScalar *v;
1880   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1881 
1882   PetscFunctionBegin;
1883   if (reuse == MAT_REUSE_MATRIX) {
1884     mat_elemental = *newmat;
1885     PetscCall(MatZeroEntries(*newmat));
1886   } else {
1887     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1888     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1889     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1890     PetscCall(MatSetUp(mat_elemental));
1891     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1892   }
1893 
1894   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1895   for (i = 0; i < N; i++) cols[i] = i;
1896   for (i = 0; i < m; i++) rows[i] = rstart + i;
1897 
1898   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1899   PetscCall(MatDenseGetArray(A, &v));
1900   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1901   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1902   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1903   PetscCall(MatDenseRestoreArray(A, &v));
1904   PetscCall(PetscFree2(rows, cols));
1905 
1906   if (reuse == MAT_INPLACE_MATRIX) {
1907     PetscCall(MatHeaderReplace(A, &mat_elemental));
1908   } else {
1909     *newmat = mat_elemental;
1910   }
1911   PetscFunctionReturn(PETSC_SUCCESS);
1912 }
1913 #endif
1914 
1915 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1916 {
1917   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1918 
1919   PetscFunctionBegin;
1920   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1921   PetscFunctionReturn(PETSC_SUCCESS);
1922 }
1923 
1924 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1925 {
1926   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1927 
1928   PetscFunctionBegin;
1929   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1930   PetscFunctionReturn(PETSC_SUCCESS);
1931 }
1932 
1933 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1934 {
1935   Mat_MPIDense *mat;
1936   PetscInt      m, nloc, N;
1937 
1938   PetscFunctionBegin;
1939   PetscCall(MatGetSize(inmat, &m, &N));
1940   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1941   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1942     PetscInt sum;
1943 
1944     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1945     /* Check sum(n) = N */
1946     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1947     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1948 
1949     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1950     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1951   }
1952 
1953   /* numeric phase */
1954   mat = (Mat_MPIDense *)(*outmat)->data;
1955   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1956   PetscFunctionReturn(PETSC_SUCCESS);
1957 }
1958 
1959 #if defined(PETSC_HAVE_CUDA)
1960 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1961 {
1962   Mat           B;
1963   Mat_MPIDense *m;
1964 
1965   PetscFunctionBegin;
1966   if (reuse == MAT_INITIAL_MATRIX) {
1967     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1968   } else if (reuse == MAT_REUSE_MATRIX) {
1969     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1970   }
1971 
1972   B = *newmat;
1973   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1974   PetscCall(PetscFree(B->defaultvectype));
1975   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1976   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1977   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1978   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1979   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1980   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1981   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1982   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1983   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1984   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1985   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1986   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1987   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1988   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1989   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1990   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1991   m = (Mat_MPIDense *)(B)->data;
1992   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1993   B->ops->bindtocpu = NULL;
1994   B->offloadmask    = PETSC_OFFLOAD_CPU;
1995   PetscFunctionReturn(PETSC_SUCCESS);
1996 }
1997 
1998 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1999 {
2000   Mat           B;
2001   Mat_MPIDense *m;
2002 
2003   PetscFunctionBegin;
2004   if (reuse == MAT_INITIAL_MATRIX) {
2005     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2006   } else if (reuse == MAT_REUSE_MATRIX) {
2007     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2008   }
2009 
2010   B = *newmat;
2011   PetscCall(PetscFree(B->defaultvectype));
2012   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
2013   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
2014   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
2015   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2016   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2017   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2018   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2019   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
2020   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
2021   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
2022   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
2023   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
2024   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
2025   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
2026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
2027   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
2028   m = (Mat_MPIDense *)(B->data);
2029   if (m->A) {
2030     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
2031     B->offloadmask = PETSC_OFFLOAD_BOTH;
2032   } else {
2033     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2034   }
2035   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
2036 
2037   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
2038   PetscFunctionReturn(PETSC_SUCCESS);
2039 }
2040 #endif
2041 
2042 #if defined(PETSC_HAVE_HIP)
2043 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2044 {
2045   Mat           B;
2046   Mat_MPIDense *m;
2047 
2048   PetscFunctionBegin;
2049   if (reuse == MAT_INITIAL_MATRIX) {
2050     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2051   } else if (reuse == MAT_REUSE_MATRIX) {
2052     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2053   }
2054 
2055   B = *newmat;
2056   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE));
2057   PetscCall(PetscFree(B->defaultvectype));
2058   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
2059   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
2060   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL));
2061   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
2062   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
2063   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
2064   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
2065   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL));
2066   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL));
2067   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL));
2068   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL));
2069   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL));
2070   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL));
2071   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL));
2072   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL));
2073   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL));
2074   m = (Mat_MPIDense *)(B)->data;
2075   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2076   B->ops->bindtocpu = NULL;
2077   B->offloadmask    = PETSC_OFFLOAD_CPU;
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2082 {
2083   Mat           B;
2084   Mat_MPIDense *m;
2085 
2086   PetscFunctionBegin;
2087   if (reuse == MAT_INITIAL_MATRIX) {
2088     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2089   } else if (reuse == MAT_REUSE_MATRIX) {
2090     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2091   }
2092 
2093   B = *newmat;
2094   PetscCall(PetscFree(B->defaultvectype));
2095   PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype));
2096   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP));
2097   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense));
2098   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2099   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2100   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2101   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2102   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP));
2103   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP));
2104   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP));
2105   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP));
2106   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP));
2107   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP));
2108   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP));
2109   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP));
2110   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP));
2111   m = (Mat_MPIDense *)(B->data);
2112   if (m->A) {
2113     PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A));
2114     B->offloadmask = PETSC_OFFLOAD_BOTH;
2115   } else {
2116     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2117   }
2118   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE));
2119 
2120   B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP;
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 #endif
2124 
2125 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2126 {
2127   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2128   PetscInt      lda;
2129 
2130   PetscFunctionBegin;
2131   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2132   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2133   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2134   a->vecinuse = col + 1;
2135   PetscCall(MatDenseGetLDA(a->A, &lda));
2136   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
2137   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2138   *v = a->cvec;
2139   PetscFunctionReturn(PETSC_SUCCESS);
2140 }
2141 
2142 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2143 {
2144   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2145 
2146   PetscFunctionBegin;
2147   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2148   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
2149   a->vecinuse = 0;
2150   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
2151   PetscCall(VecResetArray(a->cvec));
2152   if (v) *v = NULL;
2153   PetscFunctionReturn(PETSC_SUCCESS);
2154 }
2155 
2156 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2157 {
2158   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2159   PetscInt      lda;
2160 
2161   PetscFunctionBegin;
2162   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2163   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2164   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2165   a->vecinuse = col + 1;
2166   PetscCall(MatDenseGetLDA(a->A, &lda));
2167   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
2168   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2169   PetscCall(VecLockReadPush(a->cvec));
2170   *v = a->cvec;
2171   PetscFunctionReturn(PETSC_SUCCESS);
2172 }
2173 
2174 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2175 {
2176   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2177 
2178   PetscFunctionBegin;
2179   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2180   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2181   a->vecinuse = 0;
2182   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
2183   PetscCall(VecLockReadPop(a->cvec));
2184   PetscCall(VecResetArray(a->cvec));
2185   if (v) *v = NULL;
2186   PetscFunctionReturn(PETSC_SUCCESS);
2187 }
2188 
2189 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2190 {
2191   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2192   PetscInt      lda;
2193 
2194   PetscFunctionBegin;
2195   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2196   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2197   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2198   a->vecinuse = col + 1;
2199   PetscCall(MatDenseGetLDA(a->A, &lda));
2200   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2201   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2202   *v = a->cvec;
2203   PetscFunctionReturn(PETSC_SUCCESS);
2204 }
2205 
2206 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2207 {
2208   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2209 
2210   PetscFunctionBegin;
2211   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2212   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2213   a->vecinuse = 0;
2214   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2215   PetscCall(VecResetArray(a->cvec));
2216   if (v) *v = NULL;
2217   PetscFunctionReturn(PETSC_SUCCESS);
2218 }
2219 
2220 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
2221 {
2222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2223   Mat_MPIDense *c;
2224   MPI_Comm      comm;
2225   PetscInt      pbegin, pend;
2226 
2227   PetscFunctionBegin;
2228   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2229   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2230   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2231   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
2232   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
2233   if (!a->cmat) {
2234     PetscCall(MatCreate(comm, &a->cmat));
2235     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
2236     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
2237     else {
2238       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2239       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2240       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2241     }
2242     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2243     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2244   } else {
2245     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
2246     if (same && a->cmat->rmap->N != A->rmap->N) {
2247       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
2248       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2249     }
2250     if (!same) {
2251       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
2252       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
2253       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2254       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2255       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2256     }
2257     if (cend - cbegin != a->cmat->cmap->N) {
2258       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
2259       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
2260       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2261       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2262     }
2263   }
2264   c = (Mat_MPIDense *)a->cmat->data;
2265   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2266   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
2267 
2268   a->cmat->preallocated = PETSC_TRUE;
2269   a->cmat->assembled    = PETSC_TRUE;
2270 #if defined(PETSC_HAVE_DEVICE)
2271   a->cmat->offloadmask = c->A->offloadmask;
2272 #endif
2273   a->matinuse = cbegin + 1;
2274   *v          = a->cmat;
2275   PetscFunctionReturn(PETSC_SUCCESS);
2276 }
2277 
2278 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
2279 {
2280   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2281   Mat_MPIDense *c;
2282 
2283   PetscFunctionBegin;
2284   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
2285   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
2286   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
2287   a->matinuse = 0;
2288   c           = (Mat_MPIDense *)a->cmat->data;
2289   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
2290   if (v) *v = NULL;
2291 #if defined(PETSC_HAVE_DEVICE)
2292   A->offloadmask = a->A->offloadmask;
2293 #endif
2294   PetscFunctionReturn(PETSC_SUCCESS);
2295 }
2296 
2297 /*MC
2298    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
2299 
2300    Options Database Keys:
2301 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
2302 
2303   Level: beginner
2304 
2305 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
2306 M*/
2307 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
2308 {
2309   Mat_MPIDense *a;
2310 
2311   PetscFunctionBegin;
2312   PetscCall(PetscNew(&a));
2313   mat->data = (void *)a;
2314   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
2315 
2316   mat->insertmode = NOT_SET_VALUES;
2317 
2318   /* build cache for off array entries formed */
2319   a->donotstash = PETSC_FALSE;
2320 
2321   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
2322 
2323   /* stuff used for matrix vector multiply */
2324   a->lvec        = NULL;
2325   a->Mvctx       = NULL;
2326   a->roworiented = PETSC_TRUE;
2327 
2328   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
2329   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
2330   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
2331   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
2332   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
2333   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
2334   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
2335   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
2336   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
2337   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
2338   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
2339   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
2340   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
2341   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
2342   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
2343   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
2344   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
2345   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
2346   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
2347   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
2348   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
2349 #if defined(PETSC_HAVE_ELEMENTAL)
2350   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
2351 #endif
2352 #if defined(PETSC_HAVE_SCALAPACK)
2353   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
2354 #endif
2355 #if defined(PETSC_HAVE_CUDA)
2356   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
2357 #endif
2358   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
2359   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2360   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2361 #if defined(PETSC_HAVE_CUDA)
2362   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2363   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2364 #endif
2365 #if defined(PETSC_HAVE_HIP)
2366   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
2367   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2368   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2369 #endif
2370   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
2371   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
2372   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 
2376 /*MC
2377    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2378 
2379    Options Database Keys:
2380 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
2381 
2382   Level: beginner
2383 
2384 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
2385 M*/
2386 #if defined(PETSC_HAVE_CUDA)
2387   #include <petsc/private/deviceimpl.h>
2388 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2389 {
2390   PetscFunctionBegin;
2391   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2392   PetscCall(MatCreate_MPIDense(B));
2393   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
2394   PetscFunctionReturn(PETSC_SUCCESS);
2395 }
2396 #endif
2397 
2398 /*MC
2399    MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs.
2400 
2401    Options Database Keys:
2402 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()`
2403 
2404   Level: beginner
2405 
2406 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP`
2407 M*/
2408 #if defined(PETSC_HAVE_HIP)
2409   #include <petsc/private/deviceimpl.h>
2410 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B)
2411 {
2412   PetscFunctionBegin;
2413   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
2414   PetscCall(MatCreate_MPIDense(B));
2415   PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B));
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 #endif
2419 
2420 /*MC
2421    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2422 
2423    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2424    and `MATMPIDENSE` otherwise.
2425 
2426    Options Database Keys:
2427 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
2428 
2429   Level: beginner
2430 
2431 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
2432 M*/
2433 
2434 /*MC
2435    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2436    Similarly,
2437    MATDENSEHIP - MATDENSEHIP = "densehip"
2438 
2439    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2440    and `MATMPIDENSECUDA` otherwise.
2441 
2442    Options Database Keys:
2443 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
2444 
2445   Level: beginner
2446 
2447 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2448 M*/
2449 
2450 /*MC
2451    MATDENSEHIP - MATDENSEHIP = "densehip" - A matrix type to be used for dense matrices on GPUs.
2452 
2453    This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator,
2454    and `MATMPIDENSEHIP` otherwise.
2455 
2456    Options Database Keys:
2457 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()`
2458 
2459   Level: beginner
2460 
2461 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2462 M*/
2463 
2464 /*@C
2465    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2466 
2467    Collective
2468 
2469    Input Parameters:
2470 .  B - the matrix
2471 -  data - optional location of matrix data.  Set data=NULL for PETSc
2472    to control all matrix memory allocation.
2473 
2474    Notes:
2475    The dense format is fully compatible with standard Fortran 77
2476    storage by columns.
2477 
2478    The data input variable is intended primarily for Fortran programmers
2479    who wish to allocate their own matrix memory space.  Most users should
2480    set data=NULL.
2481 
2482    Level: intermediate
2483 
2484 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2485 @*/
2486 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2487 {
2488   PetscFunctionBegin;
2489   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2490   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2491   PetscFunctionReturn(PETSC_SUCCESS);
2492 }
2493 
2494 /*@
2495    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2496    array provided by the user. This is useful to avoid copying an array
2497    into a matrix
2498 
2499    Not Collective
2500 
2501    Input Parameters:
2502 +  mat - the matrix
2503 -  array - the array in column major order
2504 
2505    Note:
2506    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2507    freed when the matrix is destroyed.
2508 
2509    Level: developer
2510 
2511 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2512           `MatDenseReplaceArray()`
2513 @*/
2514 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2515 {
2516   PetscFunctionBegin;
2517   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2518   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2519   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2520 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2521   mat->offloadmask = PETSC_OFFLOAD_CPU;
2522 #endif
2523   PetscFunctionReturn(PETSC_SUCCESS);
2524 }
2525 
2526 /*@
2527    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2528 
2529    Not Collective
2530 
2531    Input Parameters:
2532 .  mat - the matrix
2533 
2534    Note:
2535    You can only call this after a call to `MatDensePlaceArray()`
2536 
2537    Level: developer
2538 
2539 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2540 @*/
2541 PetscErrorCode MatDenseResetArray(Mat mat)
2542 {
2543   PetscFunctionBegin;
2544   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2545   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2546   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2547   PetscFunctionReturn(PETSC_SUCCESS);
2548 }
2549 
2550 /*@
2551    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2552    array provided by the user. This is useful to avoid copying an array
2553    into a matrix
2554 
2555    Not Collective
2556 
2557    Input Parameters:
2558 +  mat - the matrix
2559 -  array - the array in column major order
2560 
2561    Note:
2562    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2563    freed by the user. It will be freed when the matrix is destroyed.
2564 
2565    Level: developer
2566 
2567 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2568 @*/
2569 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2570 {
2571   PetscFunctionBegin;
2572   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2573   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2574   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2575 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2576   mat->offloadmask = PETSC_OFFLOAD_CPU;
2577 #endif
2578   PetscFunctionReturn(PETSC_SUCCESS);
2579 }
2580 
2581 #if defined(PETSC_HAVE_CUDA)
2582 /*@C
2583    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2584    array provided by the user. This is useful to avoid copying an array
2585    into a matrix
2586 
2587    Not Collective
2588 
2589    Input Parameters:
2590 +  mat - the matrix
2591 -  array - the array in column major order
2592 
2593    Note:
2594    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2595    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2596 
2597    Level: developer
2598 
2599 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2600 @*/
2601 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2602 {
2603   PetscFunctionBegin;
2604   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2605   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2606   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2607   mat->offloadmask = PETSC_OFFLOAD_GPU;
2608   PetscFunctionReturn(PETSC_SUCCESS);
2609 }
2610 
2611 /*@C
2612    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2613 
2614    Not Collective
2615 
2616    Input Parameters:
2617 .  mat - the matrix
2618 
2619    Note:
2620    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2621 
2622    Level: developer
2623 
2624 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2625 @*/
2626 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2627 {
2628   PetscFunctionBegin;
2629   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2630   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2631   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2632   PetscFunctionReturn(PETSC_SUCCESS);
2633 }
2634 
2635 /*@C
2636    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2637    array provided by the user. This is useful to avoid copying an array
2638    into a matrix
2639 
2640    Not Collective
2641 
2642    Input Parameters:
2643 +  mat - the matrix
2644 -  array - the array in column major order
2645 
2646    Note:
2647    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2648    The memory passed in CANNOT be freed by the user. It will be freed
2649    when the matrix is destroyed. The array should respect the matrix leading dimension.
2650 
2651    Level: developer
2652 
2653 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2654 @*/
2655 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2656 {
2657   PetscFunctionBegin;
2658   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2659   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2660   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2661   mat->offloadmask = PETSC_OFFLOAD_GPU;
2662   PetscFunctionReturn(PETSC_SUCCESS);
2663 }
2664 
2665 /*@C
2666    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2667 
2668    Not Collective
2669 
2670    Input Parameters:
2671 .  A - the matrix
2672 
2673    Output Parameters
2674 .  array - the GPU array in column major order
2675 
2676    Notes:
2677    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2678 
2679    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2680 
2681    Level: developer
2682 
2683 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2684 @*/
2685 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2686 {
2687   PetscFunctionBegin;
2688   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2689   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2690   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 /*@C
2695    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2696 
2697    Not Collective
2698 
2699    Input Parameters:
2700 +  A - the matrix
2701 -  array - the GPU array in column major order
2702 
2703    Level: developer
2704 
2705 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2706 @*/
2707 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2708 {
2709   PetscFunctionBegin;
2710   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2711   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2712   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2713   A->offloadmask = PETSC_OFFLOAD_GPU;
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 /*@C
2718    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2719 
2720    Not Collective
2721 
2722    Input Parameters:
2723 .  A - the matrix
2724 
2725    Output Parameters
2726 .  array - the GPU array in column major order
2727 
2728    Note:
2729    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2730 
2731    Level: developer
2732 
2733 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2734 @*/
2735 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2736 {
2737   PetscFunctionBegin;
2738   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2739   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2740   PetscFunctionReturn(PETSC_SUCCESS);
2741 }
2742 
2743 /*@C
2744    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2745 
2746    Not Collective
2747 
2748    Input Parameters:
2749 +  A - the matrix
2750 -  array - the GPU array in column major order
2751 
2752    Note:
2753    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2754 
2755    Level: developer
2756 
2757 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2758 @*/
2759 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2760 {
2761   PetscFunctionBegin;
2762   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2763   PetscFunctionReturn(PETSC_SUCCESS);
2764 }
2765 
2766 /*@C
2767    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2768 
2769    Not Collective
2770 
2771    Input Parameters:
2772 .  A - the matrix
2773 
2774    Output Parameters
2775 .  array - the GPU array in column major order
2776 
2777    Note:
2778    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2779 
2780    Level: developer
2781 
2782 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2783 @*/
2784 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2785 {
2786   PetscFunctionBegin;
2787   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2788   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2789   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2790   PetscFunctionReturn(PETSC_SUCCESS);
2791 }
2792 
2793 /*@C
2794    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2795 
2796    Not Collective
2797 
2798    Input Parameters:
2799 +  A - the matrix
2800 -  array - the GPU array in column major order
2801 
2802    Level: developer
2803 
2804 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2805 @*/
2806 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2807 {
2808   PetscFunctionBegin;
2809   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2810   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2811   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2812   A->offloadmask = PETSC_OFFLOAD_GPU;
2813   PetscFunctionReturn(PETSC_SUCCESS);
2814 }
2815 #endif
2816 
2817 #if defined(PETSC_HAVE_HIP)
2818 /*@C
2819    MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2820    array provided by the user. This is useful to avoid copying an array
2821    into a matrix
2822 
2823    Not Collective
2824 
2825    Input Parameters:
2826 +  mat - the matrix
2827 -  array - the array in column major order
2828 
2829    Notes:
2830    You can return to the original array with a call to MatDenseHIPResetArray(). The user is responsible for freeing this array; it will not be
2831    freed when the matrix is destroyed. The array must have been allocated with hipMalloc().
2832 
2833    Level: developer
2834 
2835 .seealso: MatDenseHIPGetArray(), MatDenseHIPResetArray()
2836 @*/
2837 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array)
2838 {
2839   PetscFunctionBegin;
2840   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2841   PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2842   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2843   mat->offloadmask = PETSC_OFFLOAD_GPU;
2844   PetscFunctionReturn(PETSC_SUCCESS);
2845 }
2846 
2847 /*@C
2848    MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to MatDenseHIPPlaceArray()
2849 
2850    Not Collective
2851 
2852    Input Parameters:
2853 .  mat - the matrix
2854 
2855    Notes:
2856    You can only call this after a call to MatDenseHIPPlaceArray()
2857 
2858    Level: developer
2859 
2860 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray()
2861 
2862 @*/
2863 PetscErrorCode MatDenseHIPResetArray(Mat mat)
2864 {
2865   PetscFunctionBegin;
2866   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2867   PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat));
2868   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2869   PetscFunctionReturn(PETSC_SUCCESS);
2870 }
2871 
2872 /*@C
2873    MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2874    array provided by the user. This is useful to avoid copying an array
2875    into a matrix
2876 
2877    Not Collective
2878 
2879    Input Parameters:
2880 +  mat - the matrix
2881 -  array - the array in column major order
2882 
2883    Notes:
2884    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2885    The memory passed in CANNOT be freed by the user. It will be freed
2886    when the matrix is destroyed. The array should respect the matrix leading dimension.
2887 
2888    Level: developer
2889 
2890 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray()
2891 @*/
2892 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array)
2893 {
2894   PetscFunctionBegin;
2895   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2896   PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2897   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2898   mat->offloadmask = PETSC_OFFLOAD_GPU;
2899   PetscFunctionReturn(PETSC_SUCCESS);
2900 }
2901 
2902 /*@C
2903    MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix.
2904 
2905    Not Collective
2906 
2907    Input Parameters:
2908 .  A - the matrix
2909 
2910    Output Parameters
2911 .  array - the GPU array in column major order
2912 
2913    Notes:
2914    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseHIPGetArray(). The array must be restored with MatDenseHIPRestoreArrayWrite() when no longer needed.
2915 
2916    Level: developer
2917 
2918 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead()
2919 @*/
2920 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a)
2921 {
2922   PetscFunctionBegin;
2923   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2924   PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2925   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2926   PetscFunctionReturn(PETSC_SUCCESS);
2927 }
2928 
2929 /*@C
2930    MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArrayWrite().
2931 
2932    Not Collective
2933 
2934    Input Parameters:
2935 +  A - the matrix
2936 -  array - the GPU array in column major order
2937 
2938    Notes:
2939 
2940    Level: developer
2941 
2942 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2943 @*/
2944 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a)
2945 {
2946   PetscFunctionBegin;
2947   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2948   PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2949   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2950   A->offloadmask = PETSC_OFFLOAD_GPU;
2951   PetscFunctionReturn(PETSC_SUCCESS);
2952 }
2953 
2954 /*@C
2955    MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed.
2956 
2957    Not Collective
2958 
2959    Input Parameters:
2960 .  A - the matrix
2961 
2962    Output Parameters
2963 .  array - the GPU array in column major order
2964 
2965    Notes:
2966    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().
2967 
2968    Level: developer
2969 
2970 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2971 @*/
2972 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a)
2973 {
2974   PetscFunctionBegin;
2975   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2976   PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2977   PetscFunctionReturn(PETSC_SUCCESS);
2978 }
2979 
2980 /*@C
2981    MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead().
2982 
2983    Not Collective
2984 
2985    Input Parameters:
2986 +  A - the matrix
2987 -  array - the GPU array in column major order
2988 
2989    Notes:
2990    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().
2991 
2992    Level: developer
2993 
2994 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead()
2995 @*/
2996 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a)
2997 {
2998   PetscFunctionBegin;
2999   PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
3000   PetscFunctionReturn(PETSC_SUCCESS);
3001 }
3002 
3003 /*@C
3004    MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed.
3005 
3006    Not Collective
3007 
3008    Input Parameters:
3009 .  A - the matrix
3010 
3011    Output Parameters
3012 .  array - the GPU array in column major order
3013 
3014    Notes:
3015    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). For read-only access, use MatDenseHIPGetArrayRead().
3016 
3017    Level: developer
3018 
3019 .seealso: MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
3020 @*/
3021 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a)
3022 {
3023   PetscFunctionBegin;
3024   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3025   PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a));
3026   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3027   PetscFunctionReturn(PETSC_SUCCESS);
3028 }
3029 
3030 /*@C
3031    MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray().
3032 
3033    Not Collective
3034 
3035    Input Parameters:
3036 +  A - the matrix
3037 -  array - the GPU array in column major order
3038 
3039    Notes:
3040 
3041    Level: developer
3042 
3043 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
3044 @*/
3045 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a)
3046 {
3047   PetscFunctionBegin;
3048   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3049   PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a));
3050   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3051   A->offloadmask = PETSC_OFFLOAD_GPU;
3052   PetscFunctionReturn(PETSC_SUCCESS);
3053 }
3054 #endif
3055 
3056 /*@C
3057    MatCreateDense - Creates a matrix in `MATDENSE` format.
3058 
3059    Collective
3060 
3061    Input Parameters:
3062 +  comm - MPI communicator
3063 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3064 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3065 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3066 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3067 -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
3068    to control all matrix memory allocation.
3069 
3070    Output Parameter:
3071 .  A - the matrix
3072 
3073    Notes:
3074    The dense format is fully compatible with standard Fortran 77
3075    storage by columns.
3076 
3077    Although local portions of the matrix are stored in column-major
3078    order, the matrix is partitioned across MPI ranks by row.
3079 
3080    The data input variable is intended primarily for Fortran programmers
3081    who wish to allocate their own matrix memory space.  Most users should
3082    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
3083 
3084    The user MUST specify either the local or global matrix dimensions
3085    (possibly both).
3086 
3087    Level: intermediate
3088 
3089 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
3090 @*/
3091 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3092 {
3093   PetscFunctionBegin;
3094   PetscCall(MatCreate(comm, A));
3095   PetscCall(MatSetSizes(*A, m, n, M, N));
3096   PetscCall(MatSetType(*A, MATDENSE));
3097   PetscCall(MatSeqDenseSetPreallocation(*A, data));
3098   PetscCall(MatMPIDenseSetPreallocation(*A, data));
3099   PetscFunctionReturn(PETSC_SUCCESS);
3100 }
3101 
3102 #if defined(PETSC_HAVE_CUDA)
3103 /*@C
3104    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
3105 
3106    Collective
3107 
3108    Input Parameters:
3109 +  comm - MPI communicator
3110 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3111 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3112 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3113 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3114 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
3115    to control matrix memory allocation.
3116 
3117    Output Parameter:
3118 .  A - the matrix
3119 
3120    Level: intermediate
3121 
3122 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
3123 @*/
3124 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3125 {
3126   PetscFunctionBegin;
3127   PetscCall(MatCreate(comm, A));
3128   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3129   PetscCall(MatSetSizes(*A, m, n, M, N));
3130   PetscCall(MatSetType(*A, MATDENSECUDA));
3131   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
3132   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 #endif
3136 
3137 #if defined(PETSC_HAVE_HIP)
3138 /*@C
3139    MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP.
3140 
3141    Collective
3142 
3143    Input Parameters:
3144 +  comm - MPI communicator
3145 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3146 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3147 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3148 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3149 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
3150    to control matrix memory allocation.
3151 
3152    Output Parameter:
3153 .  A - the matrix
3154 
3155    Level: intermediate
3156 
3157 .seealso: `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()`
3158 @*/
3159 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3160 {
3161   PetscFunctionBegin;
3162   PetscCall(MatCreate(comm, A));
3163   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3164   PetscCall(MatSetSizes(*A, m, n, M, N));
3165   PetscCall(MatSetType(*A, MATDENSEHIP));
3166   PetscCall(MatSeqDenseHIPSetPreallocation(*A, data));
3167   PetscCall(MatMPIDenseHIPSetPreallocation(*A, data));
3168   PetscFunctionReturn(PETSC_SUCCESS);
3169 }
3170 #endif
3171 
3172 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
3173 {
3174   Mat           mat;
3175   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
3176 
3177   PetscFunctionBegin;
3178   *newmat = NULL;
3179   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
3180   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3181   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
3182   a = (Mat_MPIDense *)mat->data;
3183 
3184   mat->factortype   = A->factortype;
3185   mat->assembled    = PETSC_TRUE;
3186   mat->preallocated = PETSC_TRUE;
3187 
3188   mat->insertmode = NOT_SET_VALUES;
3189   a->donotstash   = oldmat->donotstash;
3190 
3191   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
3192   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
3193 
3194   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3195 
3196   *newmat = mat;
3197   PetscFunctionReturn(PETSC_SUCCESS);
3198 }
3199 
3200 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
3201 {
3202   PetscBool isbinary;
3203 #if defined(PETSC_HAVE_HDF5)
3204   PetscBool ishdf5;
3205 #endif
3206 
3207   PetscFunctionBegin;
3208   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3209   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3210   /* force binary viewer to load .info file if it has not yet done so */
3211   PetscCall(PetscViewerSetUp(viewer));
3212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3213 #if defined(PETSC_HAVE_HDF5)
3214   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3215 #endif
3216   if (isbinary) {
3217     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
3218 #if defined(PETSC_HAVE_HDF5)
3219   } else if (ishdf5) {
3220     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
3221 #endif
3222   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3223   PetscFunctionReturn(PETSC_SUCCESS);
3224 }
3225 
3226 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
3227 {
3228   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
3229   Mat           a, b;
3230 
3231   PetscFunctionBegin;
3232   a = matA->A;
3233   b = matB->A;
3234   PetscCall(MatEqual(a, b, flag));
3235   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
3236   PetscFunctionReturn(PETSC_SUCCESS);
3237 }
3238 
3239 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
3240 {
3241   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
3242 
3243   PetscFunctionBegin;
3244   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
3245   PetscCall(MatDestroy(&atb->atb));
3246   PetscCall(PetscFree(atb));
3247   PetscFunctionReturn(PETSC_SUCCESS);
3248 }
3249 
3250 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
3251 {
3252   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
3253 
3254   PetscFunctionBegin;
3255   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
3256   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
3257   PetscCall(PetscFree(abt));
3258   PetscFunctionReturn(PETSC_SUCCESS);
3259 }
3260 
3261 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3262 {
3263   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3264   Mat_TransMatMultDense *atb;
3265   MPI_Comm               comm;
3266   PetscMPIInt            size, *recvcounts;
3267   PetscScalar           *carray, *sendbuf;
3268   const PetscScalar     *atbarray;
3269   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
3270   const PetscInt        *ranges;
3271 
3272   PetscFunctionBegin;
3273   MatCheckProduct(C, 3);
3274   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3275   atb        = (Mat_TransMatMultDense *)C->product->data;
3276   recvcounts = atb->recvcounts;
3277   sendbuf    = atb->sendbuf;
3278 
3279   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3280   PetscCallMPI(MPI_Comm_size(comm, &size));
3281 
3282   /* compute atbarray = aseq^T * bseq */
3283   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
3284 
3285   PetscCall(MatGetOwnershipRanges(C, &ranges));
3286 
3287   /* arrange atbarray into sendbuf */
3288   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
3289   PetscCall(MatDenseGetLDA(atb->atb, &lda));
3290   for (proc = 0, k = 0; proc < size; proc++) {
3291     for (j = 0; j < cN; j++) {
3292       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
3293     }
3294   }
3295   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
3296 
3297   /* sum all atbarray to local values of C */
3298   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
3299   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
3300   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
3301   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3302   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3303   PetscFunctionReturn(PETSC_SUCCESS);
3304 }
3305 
3306 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3307 {
3308   MPI_Comm               comm;
3309   PetscMPIInt            size;
3310   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
3311   Mat_TransMatMultDense *atb;
3312   PetscBool              cisdense = PETSC_FALSE;
3313   PetscInt               i;
3314   const PetscInt        *ranges;
3315 
3316   PetscFunctionBegin;
3317   MatCheckProduct(C, 4);
3318   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3319   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3320   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
3321     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3322   }
3323 
3324   /* create matrix product C */
3325   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
3326 #if defined(PETSC_HAVE_CUDA)
3327   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
3328 #endif
3329 #if defined(PETSC_HAVE_HIP)
3330   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
3331 #endif
3332   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
3333   PetscCall(MatSetUp(C));
3334 
3335   /* create data structure for reuse C */
3336   PetscCallMPI(MPI_Comm_size(comm, &size));
3337   PetscCall(PetscNew(&atb));
3338   cM = C->rmap->N;
3339   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
3340   PetscCall(MatGetOwnershipRanges(C, &ranges));
3341   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
3342 
3343   C->product->data    = atb;
3344   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
3345   PetscFunctionReturn(PETSC_SUCCESS);
3346 }
3347 
3348 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3349 {
3350   MPI_Comm               comm;
3351   PetscMPIInt            i, size;
3352   PetscInt               maxRows, bufsiz;
3353   PetscMPIInt            tag;
3354   PetscInt               alg;
3355   Mat_MatTransMultDense *abt;
3356   Mat_Product           *product = C->product;
3357   PetscBool              flg;
3358 
3359   PetscFunctionBegin;
3360   MatCheckProduct(C, 4);
3361   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3362   /* check local size of A and B */
3363   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
3364 
3365   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
3366   alg = flg ? 0 : 1;
3367 
3368   /* setup matrix product C */
3369   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
3370   PetscCall(MatSetType(C, MATMPIDENSE));
3371   PetscCall(MatSetUp(C));
3372   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
3373 
3374   /* create data structure for reuse C */
3375   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3376   PetscCallMPI(MPI_Comm_size(comm, &size));
3377   PetscCall(PetscNew(&abt));
3378   abt->tag = tag;
3379   abt->alg = alg;
3380   switch (alg) {
3381   case 1: /* alg: "cyclic" */
3382     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
3383     bufsiz = A->cmap->N * maxRows;
3384     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
3385     break;
3386   default: /* alg: "allgatherv" */
3387     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
3388     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
3389     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
3390     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
3391     break;
3392   }
3393 
3394   C->product->data                = abt;
3395   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
3396   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
3397   PetscFunctionReturn(PETSC_SUCCESS);
3398 }
3399 
3400 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
3401 {
3402   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3403   Mat_MatTransMultDense *abt;
3404   MPI_Comm               comm;
3405   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
3406   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
3407   PetscInt               i, cK             = A->cmap->N, k, j, bn;
3408   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3409   const PetscScalar     *av, *bv;
3410   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
3411   MPI_Request            reqs[2];
3412   const PetscInt        *ranges;
3413 
3414   PetscFunctionBegin;
3415   MatCheckProduct(C, 3);
3416   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3417   abt = (Mat_MatTransMultDense *)C->product->data;
3418   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3419   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3420   PetscCallMPI(MPI_Comm_size(comm, &size));
3421   PetscCall(MatDenseGetArrayRead(a->A, &av));
3422   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3423   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3424   PetscCall(MatDenseGetLDA(a->A, &i));
3425   PetscCall(PetscBLASIntCast(i, &alda));
3426   PetscCall(MatDenseGetLDA(b->A, &i));
3427   PetscCall(PetscBLASIntCast(i, &blda));
3428   PetscCall(MatDenseGetLDA(c->A, &i));
3429   PetscCall(PetscBLASIntCast(i, &clda));
3430   PetscCall(MatGetOwnershipRanges(B, &ranges));
3431   bn = B->rmap->n;
3432   if (blda == bn) {
3433     sendbuf = (PetscScalar *)bv;
3434   } else {
3435     sendbuf = abt->buf[0];
3436     for (k = 0, i = 0; i < cK; i++) {
3437       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
3438     }
3439   }
3440   if (size > 1) {
3441     sendto   = (rank + size - 1) % size;
3442     recvfrom = (rank + size + 1) % size;
3443   } else {
3444     sendto = recvfrom = 0;
3445   }
3446   PetscCall(PetscBLASIntCast(cK, &ck));
3447   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3448   recvisfrom = rank;
3449   for (i = 0; i < size; i++) {
3450     /* we have finished receiving in sending, bufs can be read/modified */
3451     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
3452     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
3453 
3454     if (nextrecvisfrom != rank) {
3455       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
3456       sendsiz = cK * bn;
3457       recvsiz = cK * nextbn;
3458       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
3459       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
3460       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
3461     }
3462 
3463     /* local aseq * sendbuf^T */
3464     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
3465     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
3466 
3467     if (nextrecvisfrom != rank) {
3468       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
3469       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
3470     }
3471     bn         = nextbn;
3472     recvisfrom = nextrecvisfrom;
3473     sendbuf    = recvbuf;
3474   }
3475   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3476   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3477   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3478   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3479   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3480   PetscFunctionReturn(PETSC_SUCCESS);
3481 }
3482 
3483 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
3484 {
3485   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3486   Mat_MatTransMultDense *abt;
3487   MPI_Comm               comm;
3488   PetscMPIInt            size;
3489   PetscScalar           *cv, *sendbuf, *recvbuf;
3490   const PetscScalar     *av, *bv;
3491   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
3492   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3493   PetscBLASInt           cm, cn, ck, alda, clda;
3494 
3495   PetscFunctionBegin;
3496   MatCheckProduct(C, 3);
3497   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3498   abt = (Mat_MatTransMultDense *)C->product->data;
3499   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3500   PetscCallMPI(MPI_Comm_size(comm, &size));
3501   PetscCall(MatDenseGetArrayRead(a->A, &av));
3502   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3503   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3504   PetscCall(MatDenseGetLDA(a->A, &i));
3505   PetscCall(PetscBLASIntCast(i, &alda));
3506   PetscCall(MatDenseGetLDA(b->A, &blda));
3507   PetscCall(MatDenseGetLDA(c->A, &i));
3508   PetscCall(PetscBLASIntCast(i, &clda));
3509   /* copy transpose of B into buf[0] */
3510   bn      = B->rmap->n;
3511   sendbuf = abt->buf[0];
3512   recvbuf = abt->buf[1];
3513   for (k = 0, j = 0; j < bn; j++) {
3514     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
3515   }
3516   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3517   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
3518   PetscCall(PetscBLASIntCast(cK, &ck));
3519   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3520   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
3521   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
3522   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3523   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3524   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3525   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3526   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3527   PetscFunctionReturn(PETSC_SUCCESS);
3528 }
3529 
3530 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3531 {
3532   Mat_MatTransMultDense *abt;
3533 
3534   PetscFunctionBegin;
3535   MatCheckProduct(C, 3);
3536   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3537   abt = (Mat_MatTransMultDense *)C->product->data;
3538   switch (abt->alg) {
3539   case 1:
3540     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
3541     break;
3542   default:
3543     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
3544     break;
3545   }
3546   PetscFunctionReturn(PETSC_SUCCESS);
3547 }
3548 
3549 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
3550 {
3551   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
3552 
3553   PetscFunctionBegin;
3554   PetscCall(MatDestroy(&ab->Ce));
3555   PetscCall(MatDestroy(&ab->Ae));
3556   PetscCall(MatDestroy(&ab->Be));
3557   PetscCall(PetscFree(ab));
3558   PetscFunctionReturn(PETSC_SUCCESS);
3559 }
3560 
3561 #if defined(PETSC_HAVE_ELEMENTAL)
3562 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3563 {
3564   Mat_MatMultDense *ab;
3565 
3566   PetscFunctionBegin;
3567   MatCheckProduct(C, 3);
3568   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
3569   ab = (Mat_MatMultDense *)C->product->data;
3570   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
3571   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
3572   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
3573   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
3574   PetscFunctionReturn(PETSC_SUCCESS);
3575 }
3576 
3577 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3578 {
3579   Mat               Ae, Be, Ce;
3580   Mat_MatMultDense *ab;
3581 
3582   PetscFunctionBegin;
3583   MatCheckProduct(C, 4);
3584   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3585   /* check local size of A and B */
3586   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
3587     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3588   }
3589 
3590   /* create elemental matrices Ae and Be */
3591   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
3592   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
3593   PetscCall(MatSetType(Ae, MATELEMENTAL));
3594   PetscCall(MatSetUp(Ae));
3595   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
3596 
3597   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
3598   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
3599   PetscCall(MatSetType(Be, MATELEMENTAL));
3600   PetscCall(MatSetUp(Be));
3601   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
3602 
3603   /* compute symbolic Ce = Ae*Be */
3604   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
3605   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
3606 
3607   /* setup C */
3608   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3609   PetscCall(MatSetType(C, MATDENSE));
3610   PetscCall(MatSetUp(C));
3611 
3612   /* create data structure for reuse Cdense */
3613   PetscCall(PetscNew(&ab));
3614   ab->Ae = Ae;
3615   ab->Be = Be;
3616   ab->Ce = Ce;
3617 
3618   C->product->data       = ab;
3619   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
3620   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3621   PetscFunctionReturn(PETSC_SUCCESS);
3622 }
3623 #endif
3624 /* ----------------------------------------------- */
3625 #if defined(PETSC_HAVE_ELEMENTAL)
3626 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3627 {
3628   PetscFunctionBegin;
3629   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3630   C->ops->productsymbolic = MatProductSymbolic_AB;
3631   PetscFunctionReturn(PETSC_SUCCESS);
3632 }
3633 #endif
3634 
3635 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3636 {
3637   Mat_Product *product = C->product;
3638   Mat          A = product->A, B = product->B;
3639 
3640   PetscFunctionBegin;
3641   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3642     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3643   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3644   C->ops->productsymbolic          = MatProductSymbolic_AtB;
3645   PetscFunctionReturn(PETSC_SUCCESS);
3646 }
3647 
3648 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3649 {
3650   Mat_Product *product     = C->product;
3651   const char  *algTypes[2] = {"allgatherv", "cyclic"};
3652   PetscInt     alg, nalg = 2;
3653   PetscBool    flg = PETSC_FALSE;
3654 
3655   PetscFunctionBegin;
3656   /* Set default algorithm */
3657   alg = 0; /* default is allgatherv */
3658   PetscCall(PetscStrcmp(product->alg, "default", &flg));
3659   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3660 
3661   /* Get runtime option */
3662   if (product->api_user) {
3663     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
3664     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
3665     PetscOptionsEnd();
3666   } else {
3667     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
3668     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
3669     PetscOptionsEnd();
3670   }
3671   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3672 
3673   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3674   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3675   PetscFunctionReturn(PETSC_SUCCESS);
3676 }
3677 
3678 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3679 {
3680   Mat_Product *product = C->product;
3681 
3682   PetscFunctionBegin;
3683   switch (product->type) {
3684 #if defined(PETSC_HAVE_ELEMENTAL)
3685   case MATPRODUCT_AB:
3686     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
3687     break;
3688 #endif
3689   case MATPRODUCT_AtB:
3690     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3691     break;
3692   case MATPRODUCT_ABt:
3693     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3694     break;
3695   default:
3696     break;
3697   }
3698   PetscFunctionReturn(PETSC_SUCCESS);
3699 }
3700