xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the sequential or MPI `MATDENSE` matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscValidPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if defined(PETSC_HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif (PETSC_HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if defined(PETSC_HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif (PETSC_HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
554   PetscScalar       *x, zero = 0.0;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecSet(v, zero));
559   PetscCall(VecGetArray(v, &x));
560   PetscCall(VecGetSize(v, &n));
561   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579 #endif
580   PetscCall(MatStashDestroy_Private(&mat->stash));
581   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583   PetscCall(MatDestroy(&mdn->A));
584   PetscCall(VecDestroy(&mdn->lvec));
585   PetscCall(PetscSFDestroy(&mdn->Mvctx));
586   PetscCall(VecDestroy(&mdn->cvec));
587   PetscCall(MatDestroy(&mdn->cmat));
588 
589   PetscCall(PetscFree(mat->data));
590   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
591 
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605 #if defined(PETSC_HAVE_ELEMENTAL)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607 #endif
608 #if defined(PETSC_HAVE_SCALAPACK)
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610 #endif
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614 #if defined(PETSC_HAVE_CUDA)
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632 #endif
633 #if defined(PETSC_HAVE_HIP)
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(PETSC_SUCCESS);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(PETSC_SUCCESS);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(PetscViewerFlush(viewer));
747     PetscCall(MatDestroy(&A));
748   }
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753 {
754   PetscBool iascii, isbinary, isdraw, issocket;
755 
756   PetscFunctionBegin;
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
761 
762   if (iascii || issocket || isdraw) {
763     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771   Mat            mdn = mat->A;
772   PetscLogDouble isend[5], irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size = 1.0;
776 
777   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
778 
779   isend[0] = info->nz_used;
780   isend[1] = info->nz_allocated;
781   isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;
783   isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A, 1);
823     PetscCall(MatSetOption(a->A, op, flg));
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A, 1);
827     a->roworiented = flg;
828     PetscCall(MatSetOption(a->A, op, flg));
829     break;
830   case MAT_FORCE_DIAGONAL_ENTRIES:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833   case MAT_SORTED_FULL:
834     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835     break;
836   case MAT_IGNORE_OFF_PROC_ENTRIES:
837     a->donotstash = flg;
838     break;
839   case MAT_SYMMETRIC:
840   case MAT_STRUCTURALLY_SYMMETRIC:
841   case MAT_HERMITIAN:
842   case MAT_SYMMETRY_ETERNAL:
843   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844   case MAT_SPD:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846   case MAT_IGNORE_ZERO_ENTRIES:
847   case MAT_SPD_ETERNAL:
848     /* if the diagonal matrix is square it inherits some of the properties above */
849     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850     break;
851   default:
852     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853   }
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860   const PetscScalar *l;
861   PetscScalar        x, *v, *vv, *r;
862   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
863 
864   PetscFunctionBegin;
865   PetscCall(MatDenseGetArray(mdn->A, &vv));
866   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867   PetscCall(MatGetLocalSize(A, &s2, &s3));
868   if (ll) {
869     PetscCall(VecGetLocalSize(ll, &s2a));
870     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871     PetscCall(VecGetArrayRead(ll, &l));
872     for (i = 0; i < m; i++) {
873       x = l[i];
874       v = vv + i;
875       for (j = 0; j < n; j++) {
876         (*v) *= x;
877         v += lda;
878       }
879     }
880     PetscCall(VecRestoreArrayRead(ll, &l));
881     PetscCall(PetscLogFlops(1.0 * n * m));
882   }
883   if (rr) {
884     const PetscScalar *ar;
885 
886     PetscCall(VecGetLocalSize(rr, &s3a));
887     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888     PetscCall(VecGetArrayRead(rr, &ar));
889     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890     PetscCall(VecGetArray(mdn->lvec, &r));
891     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(VecRestoreArrayRead(rr, &ar));
894     for (i = 0; i < n; i++) {
895       x = r[i];
896       v = vv + i * lda;
897       for (j = 0; j < m; j++) (*v++) *= x;
898     }
899     PetscCall(VecRestoreArray(mdn->lvec, &r));
900     PetscCall(PetscLogFlops(1.0 * n * m));
901   }
902   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907 {
908   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909   PetscInt           i, j;
910   PetscMPIInt        size;
911   PetscReal          sum = 0.0;
912   const PetscScalar *av, *v;
913 
914   PetscFunctionBegin;
915   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916   v = av;
917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918   if (size == 1) {
919     PetscCall(MatNorm(mdn->A, type, nrm));
920   } else {
921     if (type == NORM_FROBENIUS) {
922       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923         sum += PetscRealPart(PetscConj(*v) * (*v));
924         v++;
925       }
926       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927       *nrm = PetscSqrtReal(*nrm);
928       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929     } else if (type == NORM_1) {
930       PetscReal *tmp, *tmp2;
931       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932       *nrm = 0.0;
933       v    = av;
934       for (j = 0; j < mdn->A->cmap->n; j++) {
935         for (i = 0; i < mdn->A->rmap->n; i++) {
936           tmp[j] += PetscAbsScalar(*v);
937           v++;
938         }
939       }
940       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941       for (j = 0; j < A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       PetscCall(PetscFree2(tmp, tmp2));
945       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       PetscCall(MatNorm(mdn->A, type, &ntemp));
949       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951   }
952   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953   PetscFunctionReturn(PETSC_SUCCESS);
954 }
955 
956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957 {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959   Mat           B;
960   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961   PetscInt      j, i, lda;
962   PetscScalar  *v;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971   } else B = *matout;
972 
973   m = a->A->rmap->n;
974   n = a->A->cmap->n;
975   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(MatDenseGetLDA(a->A, &lda));
977   PetscCall(PetscMalloc1(m, &rwork));
978   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979   for (j = 0; j < n; j++) {
980     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981     v += lda;
982   }
983   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984   PetscCall(PetscFree(rwork));
985   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     PetscCall(MatHeaderMerge(A, &B));
991   }
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
997 
998 PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(PetscLayoutSetUp(A->rmap));
1002   PetscCall(PetscLayoutSetUp(A->cmap));
1003   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008 {
1009   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatConjugate(a->A));
1022   PetscFunctionReturn(PETSC_SUCCESS);
1023 }
1024 
1025 PetscErrorCode MatRealPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatRealPart(a->A));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(MatImaginaryPart(a->A));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044 {
1045   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1046 
1047   PetscFunctionBegin;
1048   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1055 
1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057 {
1058   PetscInt      i, m, n;
1059   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060   PetscReal    *work;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatGetSize(A, &m, &n));
1064   PetscCall(PetscMalloc1(n, &work));
1065   if (type == REDUCTION_MEAN_REALPART) {
1066     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069   } else {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071   }
1072   if (type == NORM_2) {
1073     for (i = 0; i < n; i++) work[i] *= work[i];
1074   }
1075   if (type == NORM_INFINITY) {
1076     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077   } else {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079   }
1080   PetscCall(PetscFree(work));
1081   if (type == NORM_2) {
1082     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084     for (i = 0; i < n; i++) reductions[i] /= m;
1085   }
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 #if defined(PETSC_HAVE_CUDA)
1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1091 {
1092   PetscScalar *da;
1093   PetscInt     lda;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(MatDenseCUDAGetArray(A, &da));
1097   PetscCall(MatDenseGetLDA(A, &lda));
1098   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1099   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1100   PetscCall(MatDenseCUDARestoreArray(A, &da));
1101   PetscFunctionReturn(PETSC_SUCCESS);
1102 }
1103 
1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1105 {
1106   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1107   PetscInt      lda;
1108 
1109   PetscFunctionBegin;
1110   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1111   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1112   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1113   a->vecinuse = col + 1;
1114   PetscCall(MatDenseGetLDA(a->A, &lda));
1115   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1116   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1117   *v = a->cvec;
1118   PetscFunctionReturn(PETSC_SUCCESS);
1119 }
1120 
1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1122 {
1123   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1124 
1125   PetscFunctionBegin;
1126   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1127   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1128   a->vecinuse = 0;
1129   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1130   PetscCall(VecCUDAResetArray(a->cvec));
1131   if (v) *v = NULL;
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136 {
1137   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138   PetscInt      lda;
1139 
1140   PetscFunctionBegin;
1141   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1142   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1143   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1144   a->vecinuse = col + 1;
1145   PetscCall(MatDenseGetLDA(a->A, &lda));
1146   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1147   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1148   PetscCall(VecLockReadPush(a->cvec));
1149   *v = a->cvec;
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1154 {
1155   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1159   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1160   a->vecinuse = 0;
1161   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1162   PetscCall(VecLockReadPop(a->cvec));
1163   PetscCall(VecCUDAResetArray(a->cvec));
1164   if (v) *v = NULL;
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1169 {
1170   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1171   PetscInt      lda;
1172 
1173   PetscFunctionBegin;
1174   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1175   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1176   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1177   a->vecinuse = col + 1;
1178   PetscCall(MatDenseGetLDA(a->A, &lda));
1179   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1180   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1181   *v = a->cvec;
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1186 {
1187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1188 
1189   PetscFunctionBegin;
1190   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1191   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1192   a->vecinuse = 0;
1193   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1194   PetscCall(VecCUDAResetArray(a->cvec));
1195   if (v) *v = NULL;
1196   PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1200 {
1201   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1205   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1206   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1207   PetscFunctionReturn(PETSC_SUCCESS);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1211 {
1212   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1213 
1214   PetscFunctionBegin;
1215   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1216   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1217   PetscCall(MatDenseCUDAResetArray(l->A));
1218   PetscFunctionReturn(PETSC_SUCCESS);
1219 }
1220 
1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1222 {
1223   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1224 
1225   PetscFunctionBegin;
1226   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1227   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1228   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1229   PetscFunctionReturn(PETSC_SUCCESS);
1230 }
1231 
1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1233 {
1234   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1238   PetscFunctionReturn(PETSC_SUCCESS);
1239 }
1240 
1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1242 {
1243   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1247   PetscFunctionReturn(PETSC_SUCCESS);
1248 }
1249 
1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1251 {
1252   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1253 
1254   PetscFunctionBegin;
1255   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1256   PetscFunctionReturn(PETSC_SUCCESS);
1257 }
1258 
1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1260 {
1261   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1262 
1263   PetscFunctionBegin;
1264   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1265   PetscFunctionReturn(PETSC_SUCCESS);
1266 }
1267 
1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1269 {
1270   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1271 
1272   PetscFunctionBegin;
1273   PetscCall(MatDenseCUDAGetArray(l->A, a));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1278 {
1279   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1283   PetscFunctionReturn(PETSC_SUCCESS);
1284 }
1285 
1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1293 
1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1295 {
1296   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1297 
1298   PetscFunctionBegin;
1299   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1300   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1301   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1302   mat->boundtocpu = bind;
1303   if (!bind) {
1304     PetscBool iscuda;
1305 
1306     PetscCall(PetscFree(mat->defaultrandtype));
1307     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1308     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1309     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1310     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1311     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1312     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1313     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1314     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1315     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1316     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1317     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1318     mat->ops->shift = MatShift_MPIDenseCUDA;
1319   } else {
1320     PetscCall(PetscFree(mat->defaultrandtype));
1321     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1322     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1323     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1324     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1325     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1326     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1327     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1328     mat->ops->shift = MatShift_MPIDense;
1329   }
1330   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1331   PetscFunctionReturn(PETSC_SUCCESS);
1332 }
1333 
1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1335 {
1336   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1337   PetscBool     iscuda;
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1341   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1342   if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1343   PetscCall(PetscLayoutSetUp(A->rmap));
1344   PetscCall(PetscLayoutSetUp(A->cmap));
1345   if (!d->A) {
1346     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1347     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1348   }
1349   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1350   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1351   A->preallocated = PETSC_TRUE;
1352   A->assembled    = PETSC_TRUE;
1353   PetscFunctionReturn(PETSC_SUCCESS);
1354 }
1355 #endif
1356 
1357 #if defined(PETSC_HAVE_HIP)
1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha)
1359 {
1360   PetscScalar *da;
1361   PetscInt     lda;
1362 
1363   PetscFunctionBegin;
1364   PetscCall(MatDenseHIPGetArray(A, &da));
1365   PetscCall(MatDenseGetLDA(A, &lda));
1366   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1367   PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1368   PetscCall(MatDenseHIPRestoreArray(A, &da));
1369   PetscFunctionReturn(PETSC_SUCCESS);
1370 }
1371 
1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1373 {
1374   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1375   PetscInt      lda;
1376 
1377   PetscFunctionBegin;
1378   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1379   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1380   if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1381   a->vecinuse = col + 1;
1382   PetscCall(MatDenseGetLDA(a->A, &lda));
1383   PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1384   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1385   *v = a->cvec;
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1390 {
1391   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1392 
1393   PetscFunctionBegin;
1394   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1395   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1396   a->vecinuse = 0;
1397   PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1398   PetscCall(VecHIPResetArray(a->cvec));
1399   if (v) *v = NULL;
1400   PetscFunctionReturn(PETSC_SUCCESS);
1401 }
1402 
1403 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1404 {
1405   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1406   PetscInt      lda;
1407 
1408   PetscFunctionBegin;
1409   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1410   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1411   if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1412   a->vecinuse = col + 1;
1413   PetscCall(MatDenseGetLDA(a->A, &lda));
1414   PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse));
1415   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1416   PetscCall(VecLockReadPush(a->cvec));
1417   *v = a->cvec;
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1422 {
1423   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1424 
1425   PetscFunctionBegin;
1426   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1427   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1428   a->vecinuse = 0;
1429   PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse));
1430   PetscCall(VecLockReadPop(a->cvec));
1431   PetscCall(VecHIPResetArray(a->cvec));
1432   if (v) *v = NULL;
1433   PetscFunctionReturn(PETSC_SUCCESS);
1434 }
1435 
1436 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1437 {
1438   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1439   PetscInt      lda;
1440 
1441   PetscFunctionBegin;
1442   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1443   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1444   if (!a->cvec) PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1445   a->vecinuse = col + 1;
1446   PetscCall(MatDenseGetLDA(a->A, &lda));
1447   PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1448   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1449   *v = a->cvec;
1450   PetscFunctionReturn(PETSC_SUCCESS);
1451 }
1452 
1453 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1454 {
1455   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1456 
1457   PetscFunctionBegin;
1458   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1459   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1460   a->vecinuse = 0;
1461   PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1462   PetscCall(VecHIPResetArray(a->cvec));
1463   if (v) *v = NULL;
1464   PetscFunctionReturn(PETSC_SUCCESS);
1465 }
1466 
1467 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1468 {
1469   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1470 
1471   PetscFunctionBegin;
1472   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1473   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1474   PetscCall(MatDenseHIPPlaceArray(l->A, a));
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A)
1479 {
1480   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1481 
1482   PetscFunctionBegin;
1483   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1484   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1485   PetscCall(MatDenseHIPResetArray(l->A));
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 
1489 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1490 {
1491   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1492 
1493   PetscFunctionBegin;
1494   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1495   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1496   PetscCall(MatDenseHIPReplaceArray(l->A, a));
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1501 {
1502   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1503 
1504   PetscFunctionBegin;
1505   PetscCall(MatDenseHIPGetArrayWrite(l->A, a));
1506   PetscFunctionReturn(PETSC_SUCCESS);
1507 }
1508 
1509 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1510 {
1511   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1512 
1513   PetscFunctionBegin;
1514   PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a));
1515   PetscFunctionReturn(PETSC_SUCCESS);
1516 }
1517 
1518 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1519 {
1520   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1521 
1522   PetscFunctionBegin;
1523   PetscCall(MatDenseHIPGetArrayRead(l->A, a));
1524   PetscFunctionReturn(PETSC_SUCCESS);
1525 }
1526 
1527 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1528 {
1529   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1530 
1531   PetscFunctionBegin;
1532   PetscCall(MatDenseHIPRestoreArrayRead(l->A, a));
1533   PetscFunctionReturn(PETSC_SUCCESS);
1534 }
1535 
1536 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a)
1537 {
1538   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1539 
1540   PetscFunctionBegin;
1541   PetscCall(MatDenseHIPGetArray(l->A, a));
1542   PetscFunctionReturn(PETSC_SUCCESS);
1543 }
1544 
1545 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a)
1546 {
1547   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1548 
1549   PetscFunctionBegin;
1550   PetscCall(MatDenseHIPRestoreArray(l->A, a));
1551   PetscFunctionReturn(PETSC_SUCCESS);
1552 }
1553 
1554 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1555 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1556 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1557 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1558 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1559 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1560 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1561 
1562 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind)
1563 {
1564   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1565 
1566   PetscFunctionBegin;
1567   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1568   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1569   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1570   mat->boundtocpu = bind;
1571   if (!bind) {
1572     PetscBool iscuda;
1573 
1574     PetscCall(PetscFree(mat->defaultrandtype));
1575     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1576     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda));
1577     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1578     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda));
1579     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1580     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP));
1581     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP));
1582     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP));
1583     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP));
1584     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP));
1585     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP));
1586     mat->ops->shift = MatShift_MPIDenseHIP;
1587   } else {
1588     PetscCall(PetscFree(mat->defaultrandtype));
1589     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1590     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1591     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1592     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1593     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1594     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1595     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1596     mat->ops->shift = MatShift_MPIDense;
1597   }
1598   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data)
1603 {
1604   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1605   PetscBool     iscuda;
1606 
1607   PetscFunctionBegin;
1608   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1609   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda));
1610   if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1611   PetscCall(PetscLayoutSetUp(A->rmap));
1612   PetscCall(PetscLayoutSetUp(A->cmap));
1613   if (!d->A) {
1614     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1615     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1616   }
1617   PetscCall(MatSetType(d->A, MATSEQDENSEHIP));
1618   PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data));
1619   A->preallocated = PETSC_TRUE;
1620   A->assembled    = PETSC_TRUE;
1621   PetscFunctionReturn(PETSC_SUCCESS);
1622 }
1623 #endif
1624 
1625 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1626 {
1627   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(MatSetRandom(d->A, rctx));
1631 #if defined(PETSC_HAVE_DEVICE)
1632   x->offloadmask = d->A->offloadmask;
1633 #endif
1634   PetscFunctionReturn(PETSC_SUCCESS);
1635 }
1636 
1637 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1638 {
1639   PetscFunctionBegin;
1640   *missing = PETSC_FALSE;
1641   PetscFunctionReturn(PETSC_SUCCESS);
1642 }
1643 
1644 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1645 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1646 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1647 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1648 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1649 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1650 
1651 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1652                                        MatGetRow_MPIDense,
1653                                        MatRestoreRow_MPIDense,
1654                                        MatMult_MPIDense,
1655                                        /*  4*/ MatMultAdd_MPIDense,
1656                                        MatMultTranspose_MPIDense,
1657                                        MatMultTransposeAdd_MPIDense,
1658                                        NULL,
1659                                        NULL,
1660                                        NULL,
1661                                        /* 10*/ NULL,
1662                                        NULL,
1663                                        NULL,
1664                                        NULL,
1665                                        MatTranspose_MPIDense,
1666                                        /* 15*/ MatGetInfo_MPIDense,
1667                                        MatEqual_MPIDense,
1668                                        MatGetDiagonal_MPIDense,
1669                                        MatDiagonalScale_MPIDense,
1670                                        MatNorm_MPIDense,
1671                                        /* 20*/ MatAssemblyBegin_MPIDense,
1672                                        MatAssemblyEnd_MPIDense,
1673                                        MatSetOption_MPIDense,
1674                                        MatZeroEntries_MPIDense,
1675                                        /* 24*/ MatZeroRows_MPIDense,
1676                                        NULL,
1677                                        NULL,
1678                                        NULL,
1679                                        NULL,
1680                                        /* 29*/ MatSetUp_MPIDense,
1681                                        NULL,
1682                                        NULL,
1683                                        MatGetDiagonalBlock_MPIDense,
1684                                        NULL,
1685                                        /* 34*/ MatDuplicate_MPIDense,
1686                                        NULL,
1687                                        NULL,
1688                                        NULL,
1689                                        NULL,
1690                                        /* 39*/ MatAXPY_MPIDense,
1691                                        MatCreateSubMatrices_MPIDense,
1692                                        NULL,
1693                                        MatGetValues_MPIDense,
1694                                        MatCopy_MPIDense,
1695                                        /* 44*/ NULL,
1696                                        MatScale_MPIDense,
1697                                        MatShift_MPIDense,
1698                                        NULL,
1699                                        NULL,
1700                                        /* 49*/ MatSetRandom_MPIDense,
1701                                        NULL,
1702                                        NULL,
1703                                        NULL,
1704                                        NULL,
1705                                        /* 54*/ NULL,
1706                                        NULL,
1707                                        NULL,
1708                                        NULL,
1709                                        NULL,
1710                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1711                                        MatDestroy_MPIDense,
1712                                        MatView_MPIDense,
1713                                        NULL,
1714                                        NULL,
1715                                        /* 64*/ NULL,
1716                                        NULL,
1717                                        NULL,
1718                                        NULL,
1719                                        NULL,
1720                                        /* 69*/ NULL,
1721                                        NULL,
1722                                        NULL,
1723                                        NULL,
1724                                        NULL,
1725                                        /* 74*/ NULL,
1726                                        NULL,
1727                                        NULL,
1728                                        NULL,
1729                                        NULL,
1730                                        /* 79*/ NULL,
1731                                        NULL,
1732                                        NULL,
1733                                        NULL,
1734                                        /* 83*/ MatLoad_MPIDense,
1735                                        NULL,
1736                                        NULL,
1737                                        NULL,
1738                                        NULL,
1739                                        NULL,
1740                                        /* 89*/ NULL,
1741                                        NULL,
1742                                        NULL,
1743                                        NULL,
1744                                        NULL,
1745                                        /* 94*/ NULL,
1746                                        NULL,
1747                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1748                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1749                                        NULL,
1750                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1751                                        NULL,
1752                                        NULL,
1753                                        MatConjugate_MPIDense,
1754                                        NULL,
1755                                        /*104*/ NULL,
1756                                        MatRealPart_MPIDense,
1757                                        MatImaginaryPart_MPIDense,
1758                                        NULL,
1759                                        NULL,
1760                                        /*109*/ NULL,
1761                                        NULL,
1762                                        NULL,
1763                                        MatGetColumnVector_MPIDense,
1764                                        MatMissingDiagonal_MPIDense,
1765                                        /*114*/ NULL,
1766                                        NULL,
1767                                        NULL,
1768                                        NULL,
1769                                        NULL,
1770                                        /*119*/ NULL,
1771                                        NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        NULL,
1775                                        /*124*/ NULL,
1776                                        MatGetColumnReductions_MPIDense,
1777                                        NULL,
1778                                        NULL,
1779                                        NULL,
1780                                        /*129*/ NULL,
1781                                        NULL,
1782                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1783                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1784                                        NULL,
1785                                        /*134*/ NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                        /*139*/ NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        NULL,
1794                                        NULL,
1795                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1796                                        /*145*/ NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        /*150*/ NULL,
1802                                        NULL};
1803 
1804 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1805 {
1806   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1807   MatType       mtype = MATSEQDENSE;
1808 
1809   PetscFunctionBegin;
1810   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1811   PetscCall(PetscLayoutSetUp(mat->rmap));
1812   PetscCall(PetscLayoutSetUp(mat->cmap));
1813   if (!a->A) {
1814     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1815     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1816   }
1817 #if defined(PETSC_HAVE_CUDA)
1818   PetscBool iscuda;
1819   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1820   if (iscuda) mtype = MATSEQDENSECUDA;
1821 #endif
1822 #if defined(PETSC_HAVE_HIP)
1823   PetscBool iship;
1824   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1825   if (iship) mtype = MATSEQDENSEHIP;
1826 #endif
1827   PetscCall(MatSetType(a->A, mtype));
1828   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1829 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1830   mat->offloadmask = a->A->offloadmask;
1831 #endif
1832   mat->preallocated = PETSC_TRUE;
1833   mat->assembled    = PETSC_TRUE;
1834   PetscFunctionReturn(PETSC_SUCCESS);
1835 }
1836 
1837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1838 {
1839   Mat B, C;
1840 
1841   PetscFunctionBegin;
1842   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1843   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1844   PetscCall(MatDestroy(&C));
1845   if (reuse == MAT_REUSE_MATRIX) {
1846     C = *newmat;
1847   } else C = NULL;
1848   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1849   PetscCall(MatDestroy(&B));
1850   if (reuse == MAT_INPLACE_MATRIX) {
1851     PetscCall(MatHeaderReplace(A, &C));
1852   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1853   PetscFunctionReturn(PETSC_SUCCESS);
1854 }
1855 
1856 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1857 {
1858   Mat B, C;
1859 
1860   PetscFunctionBegin;
1861   PetscCall(MatDenseGetLocalMatrix(A, &C));
1862   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1863   if (reuse == MAT_REUSE_MATRIX) {
1864     C = *newmat;
1865   } else C = NULL;
1866   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1867   PetscCall(MatDestroy(&B));
1868   if (reuse == MAT_INPLACE_MATRIX) {
1869     PetscCall(MatHeaderReplace(A, &C));
1870   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 #if defined(PETSC_HAVE_ELEMENTAL)
1875 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1876 {
1877   Mat          mat_elemental;
1878   PetscScalar *v;
1879   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1880 
1881   PetscFunctionBegin;
1882   if (reuse == MAT_REUSE_MATRIX) {
1883     mat_elemental = *newmat;
1884     PetscCall(MatZeroEntries(*newmat));
1885   } else {
1886     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1887     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1888     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1889     PetscCall(MatSetUp(mat_elemental));
1890     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1891   }
1892 
1893   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1894   for (i = 0; i < N; i++) cols[i] = i;
1895   for (i = 0; i < m; i++) rows[i] = rstart + i;
1896 
1897   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1898   PetscCall(MatDenseGetArray(A, &v));
1899   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1900   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1901   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1902   PetscCall(MatDenseRestoreArray(A, &v));
1903   PetscCall(PetscFree2(rows, cols));
1904 
1905   if (reuse == MAT_INPLACE_MATRIX) {
1906     PetscCall(MatHeaderReplace(A, &mat_elemental));
1907   } else {
1908     *newmat = mat_elemental;
1909   }
1910   PetscFunctionReturn(PETSC_SUCCESS);
1911 }
1912 #endif
1913 
1914 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1915 {
1916   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1917 
1918   PetscFunctionBegin;
1919   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1920   PetscFunctionReturn(PETSC_SUCCESS);
1921 }
1922 
1923 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1924 {
1925   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1926 
1927   PetscFunctionBegin;
1928   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1929   PetscFunctionReturn(PETSC_SUCCESS);
1930 }
1931 
1932 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1933 {
1934   Mat_MPIDense *mat;
1935   PetscInt      m, nloc, N;
1936 
1937   PetscFunctionBegin;
1938   PetscCall(MatGetSize(inmat, &m, &N));
1939   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1940   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1941     PetscInt sum;
1942 
1943     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1944     /* Check sum(n) = N */
1945     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1946     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1947 
1948     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1949     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1950   }
1951 
1952   /* numeric phase */
1953   mat = (Mat_MPIDense *)(*outmat)->data;
1954   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1955   PetscFunctionReturn(PETSC_SUCCESS);
1956 }
1957 
1958 #if defined(PETSC_HAVE_CUDA)
1959 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1960 {
1961   Mat           B;
1962   Mat_MPIDense *m;
1963 
1964   PetscFunctionBegin;
1965   if (reuse == MAT_INITIAL_MATRIX) {
1966     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1967   } else if (reuse == MAT_REUSE_MATRIX) {
1968     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1969   }
1970 
1971   B = *newmat;
1972   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1973   PetscCall(PetscFree(B->defaultvectype));
1974   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1975   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1976   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1977   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1978   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1979   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1980   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1981   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1982   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1983   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1984   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1985   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1986   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1987   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1988   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1989   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1990   m = (Mat_MPIDense *)(B)->data;
1991   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1992   B->ops->bindtocpu = NULL;
1993   B->offloadmask    = PETSC_OFFLOAD_CPU;
1994   PetscFunctionReturn(PETSC_SUCCESS);
1995 }
1996 
1997 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1998 {
1999   Mat           B;
2000   Mat_MPIDense *m;
2001 
2002   PetscFunctionBegin;
2003   if (reuse == MAT_INITIAL_MATRIX) {
2004     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2005   } else if (reuse == MAT_REUSE_MATRIX) {
2006     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2007   }
2008 
2009   B = *newmat;
2010   PetscCall(PetscFree(B->defaultvectype));
2011   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
2012   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
2013   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
2014   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2015   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2016   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2017   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2018   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
2019   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
2020   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
2021   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
2022   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
2023   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
2024   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
2025   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
2026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
2027   m = (Mat_MPIDense *)(B->data);
2028   if (m->A) {
2029     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
2030     B->offloadmask = PETSC_OFFLOAD_BOTH;
2031   } else {
2032     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2033   }
2034   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
2035 
2036   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 #endif
2040 
2041 #if defined(PETSC_HAVE_HIP)
2042 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2043 {
2044   Mat           B;
2045   Mat_MPIDense *m;
2046 
2047   PetscFunctionBegin;
2048   if (reuse == MAT_INITIAL_MATRIX) {
2049     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2050   } else if (reuse == MAT_REUSE_MATRIX) {
2051     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2052   }
2053 
2054   B = *newmat;
2055   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE));
2056   PetscCall(PetscFree(B->defaultvectype));
2057   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
2058   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
2059   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL));
2060   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
2061   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
2062   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
2063   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
2064   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL));
2065   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL));
2066   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL));
2067   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL));
2068   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL));
2069   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL));
2070   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL));
2071   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL));
2072   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL));
2073   m = (Mat_MPIDense *)(B)->data;
2074   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2075   B->ops->bindtocpu = NULL;
2076   B->offloadmask    = PETSC_OFFLOAD_CPU;
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2081 {
2082   Mat           B;
2083   Mat_MPIDense *m;
2084 
2085   PetscFunctionBegin;
2086   if (reuse == MAT_INITIAL_MATRIX) {
2087     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2088   } else if (reuse == MAT_REUSE_MATRIX) {
2089     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2090   }
2091 
2092   B = *newmat;
2093   PetscCall(PetscFree(B->defaultvectype));
2094   PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype));
2095   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP));
2096   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense));
2097   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2098   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2099   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2100   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2101   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP));
2102   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP));
2103   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP));
2104   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP));
2105   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP));
2106   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP));
2107   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP));
2108   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP));
2109   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP));
2110   m = (Mat_MPIDense *)(B->data);
2111   if (m->A) {
2112     PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A));
2113     B->offloadmask = PETSC_OFFLOAD_BOTH;
2114   } else {
2115     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2116   }
2117   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE));
2118 
2119   B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP;
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 #endif
2123 
2124 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2125 {
2126   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2127   PetscInt      lda;
2128 
2129   PetscFunctionBegin;
2130   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2131   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2132   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2133   a->vecinuse = col + 1;
2134   PetscCall(MatDenseGetLDA(a->A, &lda));
2135   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
2136   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2137   *v = a->cvec;
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2142 {
2143   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2144 
2145   PetscFunctionBegin;
2146   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2147   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
2148   a->vecinuse = 0;
2149   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
2150   PetscCall(VecResetArray(a->cvec));
2151   if (v) *v = NULL;
2152   PetscFunctionReturn(PETSC_SUCCESS);
2153 }
2154 
2155 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2156 {
2157   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2158   PetscInt      lda;
2159 
2160   PetscFunctionBegin;
2161   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2162   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2163   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2164   a->vecinuse = col + 1;
2165   PetscCall(MatDenseGetLDA(a->A, &lda));
2166   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
2167   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2168   PetscCall(VecLockReadPush(a->cvec));
2169   *v = a->cvec;
2170   PetscFunctionReturn(PETSC_SUCCESS);
2171 }
2172 
2173 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2174 {
2175   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2176 
2177   PetscFunctionBegin;
2178   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2179   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2180   a->vecinuse = 0;
2181   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
2182   PetscCall(VecLockReadPop(a->cvec));
2183   PetscCall(VecResetArray(a->cvec));
2184   if (v) *v = NULL;
2185   PetscFunctionReturn(PETSC_SUCCESS);
2186 }
2187 
2188 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2189 {
2190   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2191   PetscInt      lda;
2192 
2193   PetscFunctionBegin;
2194   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2195   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2196   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2197   a->vecinuse = col + 1;
2198   PetscCall(MatDenseGetLDA(a->A, &lda));
2199   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2200   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2201   *v = a->cvec;
2202   PetscFunctionReturn(PETSC_SUCCESS);
2203 }
2204 
2205 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2206 {
2207   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2208 
2209   PetscFunctionBegin;
2210   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2211   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2212   a->vecinuse = 0;
2213   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2214   PetscCall(VecResetArray(a->cvec));
2215   if (v) *v = NULL;
2216   PetscFunctionReturn(PETSC_SUCCESS);
2217 }
2218 
2219 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
2220 {
2221   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2222   Mat_MPIDense *c;
2223   MPI_Comm      comm;
2224   PetscInt      pbegin, pend;
2225 
2226   PetscFunctionBegin;
2227   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2228   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2229   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2230   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
2231   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
2232   if (!a->cmat) {
2233     PetscCall(MatCreate(comm, &a->cmat));
2234     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
2235     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
2236     else {
2237       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2238       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2239       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2240     }
2241     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2242     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2243   } else {
2244     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
2245     if (same && a->cmat->rmap->N != A->rmap->N) {
2246       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
2247       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2248     }
2249     if (!same) {
2250       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
2251       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
2252       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2253       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2254       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2255     }
2256     if (cend - cbegin != a->cmat->cmap->N) {
2257       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
2258       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
2259       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2260       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2261     }
2262   }
2263   c = (Mat_MPIDense *)a->cmat->data;
2264   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2265   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
2266 
2267   a->cmat->preallocated = PETSC_TRUE;
2268   a->cmat->assembled    = PETSC_TRUE;
2269 #if defined(PETSC_HAVE_DEVICE)
2270   a->cmat->offloadmask = c->A->offloadmask;
2271 #endif
2272   a->matinuse = cbegin + 1;
2273   *v          = a->cmat;
2274   PetscFunctionReturn(PETSC_SUCCESS);
2275 }
2276 
2277 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
2278 {
2279   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2280   Mat_MPIDense *c;
2281 
2282   PetscFunctionBegin;
2283   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
2284   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
2285   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
2286   a->matinuse = 0;
2287   c           = (Mat_MPIDense *)a->cmat->data;
2288   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
2289   if (v) *v = NULL;
2290 #if defined(PETSC_HAVE_DEVICE)
2291   A->offloadmask = a->A->offloadmask;
2292 #endif
2293   PetscFunctionReturn(PETSC_SUCCESS);
2294 }
2295 
2296 /*MC
2297    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
2298 
2299    Options Database Key:
2300 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
2301 
2302   Level: beginner
2303 
2304 .seealso: [](chapter_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
2305 M*/
2306 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
2307 {
2308   Mat_MPIDense *a;
2309 
2310   PetscFunctionBegin;
2311   PetscCall(PetscNew(&a));
2312   mat->data = (void *)a;
2313   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
2314 
2315   mat->insertmode = NOT_SET_VALUES;
2316 
2317   /* build cache for off array entries formed */
2318   a->donotstash = PETSC_FALSE;
2319 
2320   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
2321 
2322   /* stuff used for matrix vector multiply */
2323   a->lvec        = NULL;
2324   a->Mvctx       = NULL;
2325   a->roworiented = PETSC_TRUE;
2326 
2327   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
2328   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
2329   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
2330   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
2331   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
2332   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
2333   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
2334   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
2335   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
2336   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
2337   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
2338   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
2339   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
2340   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
2341   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
2342   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
2343   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
2344   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
2345   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
2346   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
2347   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
2348 #if defined(PETSC_HAVE_ELEMENTAL)
2349   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
2350 #endif
2351 #if defined(PETSC_HAVE_SCALAPACK)
2352   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
2353 #endif
2354 #if defined(PETSC_HAVE_CUDA)
2355   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
2356 #endif
2357   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
2358   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2359   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2360 #if defined(PETSC_HAVE_CUDA)
2361   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2362   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2363 #endif
2364 #if defined(PETSC_HAVE_HIP)
2365   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
2366   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2367   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2368 #endif
2369   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
2370   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
2371   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
2372   PetscFunctionReturn(PETSC_SUCCESS);
2373 }
2374 
2375 /*MC
2376    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2377 
2378    Options Database Key:
2379 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
2380 
2381   Level: beginner
2382 
2383 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
2384 M*/
2385 #if defined(PETSC_HAVE_CUDA)
2386   #include <petsc/private/deviceimpl.h>
2387 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2388 {
2389   PetscFunctionBegin;
2390   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2391   PetscCall(MatCreate_MPIDense(B));
2392   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
2393   PetscFunctionReturn(PETSC_SUCCESS);
2394 }
2395 #endif
2396 
2397 /*MC
2398    MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs.
2399 
2400    Options Database Key:
2401 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()`
2402 
2403   Level: beginner
2404 
2405 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP`
2406 M*/
2407 #if defined(PETSC_HAVE_HIP)
2408   #include <petsc/private/deviceimpl.h>
2409 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B)
2410 {
2411   PetscFunctionBegin;
2412   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
2413   PetscCall(MatCreate_MPIDense(B));
2414   PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B));
2415   PetscFunctionReturn(PETSC_SUCCESS);
2416 }
2417 #endif
2418 
2419 /*MC
2420    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2421 
2422    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2423    and `MATMPIDENSE` otherwise.
2424 
2425    Options Database Key:
2426 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
2427 
2428   Level: beginner
2429 
2430 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
2431 M*/
2432 
2433 /*MC
2434    MATDENSECUDA -  "densecuda" - A matrix type to be used for dense matrices on GPUs.
2435    Similarly,
2436    `MATDENSEHIP` = "densehip"
2437 
2438    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2439    and `MATMPIDENSECUDA` otherwise.
2440 
2441    Options Database Key:
2442 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
2443 
2444   Level: beginner
2445 
2446 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2447 M*/
2448 
2449 /*MC
2450    MATDENSEHIP - "densehip" - A matrix type to be used for dense matrices on GPUs.
2451 
2452    This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator,
2453    and `MATMPIDENSEHIP` otherwise.
2454 
2455    Options Database Key:
2456 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()`
2457 
2458   Level: beginner
2459 
2460 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2461 M*/
2462 
2463 /*@C
2464    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2465 
2466    Collective
2467 
2468    Input Parameters:
2469 .  B - the matrix
2470 -  data - optional location of matrix data.  Set to `NULL` for PETSc
2471    to control all matrix memory allocation.
2472 
2473    Level: intermediate
2474 
2475    Notes:
2476    The dense format is fully compatible with standard Fortran
2477    storage by columns.
2478 
2479    The data input variable is intended primarily for Fortran programmers
2480    who wish to allocate their own matrix memory space.  Most users should
2481    set `data` to `NULL`.
2482 
2483 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2484 @*/
2485 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2486 {
2487   PetscFunctionBegin;
2488   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2489   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2490   PetscFunctionReturn(PETSC_SUCCESS);
2491 }
2492 
2493 /*@
2494    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2495    array provided by the user. This is useful to avoid copying an array
2496    into a matrix
2497 
2498    Not Collective
2499 
2500    Input Parameters:
2501 +  mat - the matrix
2502 -  array - the array in column major order
2503 
2504    Level: developer
2505 
2506    Note:
2507    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2508    freed when the matrix is destroyed.
2509 
2510 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2511           `MatDenseReplaceArray()`
2512 @*/
2513 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2514 {
2515   PetscFunctionBegin;
2516   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2517   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2518   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2519 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2520   mat->offloadmask = PETSC_OFFLOAD_CPU;
2521 #endif
2522   PetscFunctionReturn(PETSC_SUCCESS);
2523 }
2524 
2525 /*@
2526    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2527 
2528    Not Collective
2529 
2530    Input Parameters:
2531 .  mat - the matrix
2532 
2533    Level: developer
2534 
2535    Note:
2536    You can only call this after a call to `MatDensePlaceArray()`
2537 
2538 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2539 @*/
2540 PetscErrorCode MatDenseResetArray(Mat mat)
2541 {
2542   PetscFunctionBegin;
2543   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2544   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2545   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2546   PetscFunctionReturn(PETSC_SUCCESS);
2547 }
2548 
2549 /*@
2550    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2551    array provided by the user. This is useful to avoid copying an array
2552    into a matrix
2553 
2554    Not Collective
2555 
2556    Input Parameters:
2557 +  mat - the matrix
2558 -  array - the array in column major order
2559 
2560    Level: developer
2561 
2562    Note:
2563    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2564    freed by the user. It will be freed when the matrix is destroyed.
2565 
2566 .seealso: [](chapter_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2567 @*/
2568 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2569 {
2570   PetscFunctionBegin;
2571   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2572   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2573   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2574 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2575   mat->offloadmask = PETSC_OFFLOAD_CPU;
2576 #endif
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 #if defined(PETSC_HAVE_CUDA)
2581 /*@C
2582    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2583    array provided by the user. This is useful to avoid copying an array
2584    into a matrix
2585 
2586    Not Collective
2587 
2588    Input Parameters:
2589 +  mat - the matrix
2590 -  array - the array in column major order
2591 
2592    Level: developer
2593 
2594    Note:
2595    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2596    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2597 
2598 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2599 @*/
2600 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2601 {
2602   PetscFunctionBegin;
2603   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2604   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2605   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2606   mat->offloadmask = PETSC_OFFLOAD_GPU;
2607   PetscFunctionReturn(PETSC_SUCCESS);
2608 }
2609 
2610 /*@C
2611    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2612 
2613    Not Collective
2614 
2615    Input Parameters:
2616 .  mat - the matrix
2617 
2618    Level: developer
2619 
2620    Note:
2621    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2622 
2623 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2624 @*/
2625 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2626 {
2627   PetscFunctionBegin;
2628   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2629   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2630   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2631   PetscFunctionReturn(PETSC_SUCCESS);
2632 }
2633 
2634 /*@C
2635    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2636    array provided by the user. This is useful to avoid copying an array
2637    into a matrix
2638 
2639    Not Collective
2640 
2641    Input Parameters:
2642 +  mat - the matrix
2643 -  array - the array in column major order
2644 
2645    Level: developer
2646 
2647    Note:
2648    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2649    The memory passed in CANNOT be freed by the user. It will be freed
2650    when the matrix is destroyed. The array should respect the matrix leading dimension.
2651 
2652 .seealso: [](chapter_matrices), `Mat`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2653 @*/
2654 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2655 {
2656   PetscFunctionBegin;
2657   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2658   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2659   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2660   mat->offloadmask = PETSC_OFFLOAD_GPU;
2661   PetscFunctionReturn(PETSC_SUCCESS);
2662 }
2663 
2664 /*@C
2665    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2666 
2667    Not Collective
2668 
2669    Input Parameters:
2670 .  A - the matrix
2671 
2672    Output Parameters
2673 .  array - the GPU array in column major order
2674 
2675    Level: developer
2676 
2677    Notes:
2678    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2679 
2680    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2681 
2682 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2683 @*/
2684 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2685 {
2686   PetscFunctionBegin;
2687   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2688   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2689   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2690   PetscFunctionReturn(PETSC_SUCCESS);
2691 }
2692 
2693 /*@C
2694    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2695 
2696    Not Collective
2697 
2698    Input Parameters:
2699 +  A - the matrix
2700 -  array - the GPU array in column major order
2701 
2702    Level: developer
2703 
2704 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2705 @*/
2706 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2707 {
2708   PetscFunctionBegin;
2709   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2710   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2711   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2712   A->offloadmask = PETSC_OFFLOAD_GPU;
2713   PetscFunctionReturn(PETSC_SUCCESS);
2714 }
2715 
2716 /*@C
2717    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2718 
2719    Not Collective
2720 
2721    Input Parameters:
2722 .  A - the matrix
2723 
2724    Output Parameters
2725 .  array - the GPU array in column major order
2726 
2727    Level: developer
2728 
2729    Note:
2730    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2731 
2732 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2733 @*/
2734 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2735 {
2736   PetscFunctionBegin;
2737   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2738   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2739   PetscFunctionReturn(PETSC_SUCCESS);
2740 }
2741 
2742 /*@C
2743    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2744 
2745    Not Collective
2746 
2747    Input Parameters:
2748 +  A - the matrix
2749 -  array - the GPU array in column major order
2750 
2751    Level: developer
2752 
2753    Note:
2754    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2755 
2756 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2757 @*/
2758 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2759 {
2760   PetscFunctionBegin;
2761   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2762   PetscFunctionReturn(PETSC_SUCCESS);
2763 }
2764 
2765 /*@C
2766    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2767 
2768    Not Collective
2769 
2770    Input Parameters:
2771 .  A - the matrix
2772 
2773    Output Parameters
2774 .  array - the GPU array in column major order
2775 
2776    Level: developer
2777 
2778    Note:
2779    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2780 
2781 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2782 @*/
2783 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2784 {
2785   PetscFunctionBegin;
2786   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2787   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2788   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2789   PetscFunctionReturn(PETSC_SUCCESS);
2790 }
2791 
2792 /*@C
2793    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2794 
2795    Not Collective
2796 
2797    Input Parameters:
2798 +  A - the matrix
2799 -  array - the GPU array in column major order
2800 
2801    Level: developer
2802 
2803 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2804 @*/
2805 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2806 {
2807   PetscFunctionBegin;
2808   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2809   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2810   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2811   A->offloadmask = PETSC_OFFLOAD_GPU;
2812   PetscFunctionReturn(PETSC_SUCCESS);
2813 }
2814 #endif
2815 
2816 #if defined(PETSC_HAVE_HIP)
2817 /*@C
2818    MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2819    array provided by the user. This is useful to avoid copying an array
2820    into a matrix
2821 
2822    Not Collective
2823 
2824    Input Parameters:
2825 +  mat - the matrix
2826 -  array - the array in column major order
2827 
2828    Level: developer
2829 
2830    Note:
2831    You can return to the original array with a call to `MatDenseHIPResetArray()`. The user is responsible for freeing this array; it will not be
2832    freed when the matrix is destroyed. The array must have been allocated with `hipMalloc()`.
2833 
2834 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPResetArray()
2835 @*/
2836 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array)
2837 {
2838   PetscFunctionBegin;
2839   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2840   PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2841   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2842   mat->offloadmask = PETSC_OFFLOAD_GPU;
2843   PetscFunctionReturn(PETSC_SUCCESS);
2844 }
2845 
2846 /*@C
2847    MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to `MatDenseHIPPlaceArray()`
2848 
2849    Not Collective
2850 
2851    Input Parameters:
2852 .  mat - the matrix
2853 
2854    Level: developer
2855 
2856    Notes:
2857    You can only call this after a call to `MatDenseHIPPlaceArray()`
2858 
2859 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray()
2860 @*/
2861 PetscErrorCode MatDenseHIPResetArray(Mat mat)
2862 {
2863   PetscFunctionBegin;
2864   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2865   PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat));
2866   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2867   PetscFunctionReturn(PETSC_SUCCESS);
2868 }
2869 
2870 /*@C
2871    MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2872    array provided by the user. This is useful to avoid copying an array
2873    into a matrix
2874 
2875    Not Collective
2876 
2877    Input Parameters:
2878 +  mat - the matrix
2879 -  array - the array in column major order
2880 
2881    Level: developer
2882 
2883    Note:
2884    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2885    The memory passed in CANNOT be freed by the user. It will be freed
2886    when the matrix is destroyed. The array should respect the matrix leading dimension.
2887 
2888 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray()
2889 @*/
2890 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array)
2891 {
2892   PetscFunctionBegin;
2893   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2894   PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2895   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2896   mat->offloadmask = PETSC_OFFLOAD_GPU;
2897   PetscFunctionReturn(PETSC_SUCCESS);
2898 }
2899 
2900 /*@C
2901    MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix.
2902 
2903    Not Collective
2904 
2905    Input Parameters:
2906 .  A - the matrix
2907 
2908    Output Parameters
2909 .  array - the GPU array in column major order
2910 
2911    Level: developer
2912 
2913    Note:
2914    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseHIPGetArray()`.
2915    The array must be restored with `MatDenseHIPRestoreArrayWrite()` when no longer needed.
2916 
2917 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead()
2918 @*/
2919 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a)
2920 {
2921   PetscFunctionBegin;
2922   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2923   PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2924   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2925   PetscFunctionReturn(PETSC_SUCCESS);
2926 }
2927 
2928 /*@C
2929    MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with `MatDenseHIPGetArrayWrite()`.
2930 
2931    Not Collective
2932 
2933    Input Parameters:
2934 +  A - the matrix
2935 -  array - the GPU array in column major order
2936 
2937    Level: developer
2938 
2939 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2940 @*/
2941 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a)
2942 {
2943   PetscFunctionBegin;
2944   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2945   PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2946   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2947   A->offloadmask = PETSC_OFFLOAD_GPU;
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
2951 /*@C
2952    MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed.
2953 
2954    Not Collective
2955 
2956    Input Parameters:
2957 .  A - the matrix
2958 
2959    Output Parameters
2960 .  array - the GPU array in column major order
2961 
2962    Level: developer
2963 
2964    Note:
2965    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.
2966 
2967 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2968 @*/
2969 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a)
2970 {
2971   PetscFunctionBegin;
2972   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2973   PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2974   PetscFunctionReturn(PETSC_SUCCESS);
2975 }
2976 
2977 /*@C
2978    MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead().
2979 
2980    Not Collective
2981 
2982    Input Parameters:
2983 +  A - the matrix
2984 -  array - the GPU array in column major order
2985 
2986    Level: developer
2987 
2988    Notes:
2989    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.
2990 
2991 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead()
2992 @*/
2993 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a)
2994 {
2995   PetscFunctionBegin;
2996   PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2997   PetscFunctionReturn(PETSC_SUCCESS);
2998 }
2999 
3000 /*@C
3001    MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed.
3002 
3003    Not Collective
3004 
3005    Input Parameters:
3006 .  A - the matrix
3007 
3008    Output Parameters
3009 .  array - the GPU array in column major order
3010 
3011    Level: developer
3012 
3013    Note:
3014    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.
3015 
3016    For read-only access, use `MatDenseHIPGetArrayRead()`.
3017 
3018 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
3019 @*/
3020 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a)
3021 {
3022   PetscFunctionBegin;
3023   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3024   PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a));
3025   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3026   PetscFunctionReturn(PETSC_SUCCESS);
3027 }
3028 
3029 /*@C
3030    MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray().
3031 
3032    Not Collective
3033 
3034    Input Parameters:
3035 +  A - the matrix
3036 -  array - the GPU array in column major order
3037 
3038    Level: developer
3039 
3040 .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
3041 @*/
3042 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a)
3043 {
3044   PetscFunctionBegin;
3045   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3046   PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a));
3047   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3048   A->offloadmask = PETSC_OFFLOAD_GPU;
3049   PetscFunctionReturn(PETSC_SUCCESS);
3050 }
3051 #endif
3052 
3053 /*@C
3054    MatCreateDense - Creates a matrix in `MATDENSE` format.
3055 
3056    Collective
3057 
3058    Input Parameters:
3059 +  comm - MPI communicator
3060 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3061 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3062 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3063 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3064 -  data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
3065    to control all matrix memory allocation.
3066 
3067    Output Parameter:
3068 .  A - the matrix
3069 
3070    Level: intermediate
3071 
3072    Notes:
3073    The dense format is fully compatible with standard Fortran
3074    storage by columns.
3075 
3076    Although local portions of the matrix are stored in column-major
3077    order, the matrix is partitioned across MPI ranks by row.
3078 
3079    The data input variable is intended primarily for Fortran programmers
3080    who wish to allocate their own matrix memory space.  Most users should
3081    set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
3082 
3083    The user MUST specify either the local or global matrix dimensions
3084    (possibly both).
3085 
3086 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
3087 @*/
3088 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3089 {
3090   PetscFunctionBegin;
3091   PetscCall(MatCreate(comm, A));
3092   PetscCall(MatSetSizes(*A, m, n, M, N));
3093   PetscCall(MatSetType(*A, MATDENSE));
3094   PetscCall(MatSeqDenseSetPreallocation(*A, data));
3095   PetscCall(MatMPIDenseSetPreallocation(*A, data));
3096   PetscFunctionReturn(PETSC_SUCCESS);
3097 }
3098 
3099 #if defined(PETSC_HAVE_CUDA)
3100 /*@C
3101    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
3102 
3103    Collective
3104 
3105    Input Parameters:
3106 +  comm - MPI communicator
3107 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3108 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3109 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3110 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3111 -  data - optional location of GPU matrix data.  Use `NULL` for PETSc
3112    to control matrix memory allocation.
3113 
3114    Output Parameter:
3115 .  A - the matrix
3116 
3117    Level: intermediate
3118 
3119 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
3120 @*/
3121 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3122 {
3123   PetscFunctionBegin;
3124   PetscCall(MatCreate(comm, A));
3125   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3126   PetscCall(MatSetSizes(*A, m, n, M, N));
3127   PetscCall(MatSetType(*A, MATDENSECUDA));
3128   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
3129   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
3130   PetscFunctionReturn(PETSC_SUCCESS);
3131 }
3132 #endif
3133 
3134 #if defined(PETSC_HAVE_HIP)
3135 /*@C
3136    MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP.
3137 
3138    Collective
3139 
3140    Input Parameters:
3141 +  comm - MPI communicator
3142 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3143 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3144 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3145 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3146 -  data - optional location of GPU matrix data.  Use `NULL` for PETSc
3147    to control matrix memory allocation.
3148 
3149    Output Parameter:
3150 .  A - the matrix
3151 
3152    Level: intermediate
3153 
3154 .seealso: [](chapter_matrices), `Mat`, `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()`
3155 @*/
3156 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3157 {
3158   PetscFunctionBegin;
3159   PetscCall(MatCreate(comm, A));
3160   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3161   PetscCall(MatSetSizes(*A, m, n, M, N));
3162   PetscCall(MatSetType(*A, MATDENSEHIP));
3163   PetscCall(MatSeqDenseHIPSetPreallocation(*A, data));
3164   PetscCall(MatMPIDenseHIPSetPreallocation(*A, data));
3165   PetscFunctionReturn(PETSC_SUCCESS);
3166 }
3167 #endif
3168 
3169 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
3170 {
3171   Mat           mat;
3172   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
3173 
3174   PetscFunctionBegin;
3175   *newmat = NULL;
3176   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
3177   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3178   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
3179   a = (Mat_MPIDense *)mat->data;
3180 
3181   mat->factortype   = A->factortype;
3182   mat->assembled    = PETSC_TRUE;
3183   mat->preallocated = PETSC_TRUE;
3184 
3185   mat->insertmode = NOT_SET_VALUES;
3186   a->donotstash   = oldmat->donotstash;
3187 
3188   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
3189   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
3190 
3191   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3192 
3193   *newmat = mat;
3194   PetscFunctionReturn(PETSC_SUCCESS);
3195 }
3196 
3197 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
3198 {
3199   PetscBool isbinary;
3200 #if defined(PETSC_HAVE_HDF5)
3201   PetscBool ishdf5;
3202 #endif
3203 
3204   PetscFunctionBegin;
3205   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3206   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3207   /* force binary viewer to load .info file if it has not yet done so */
3208   PetscCall(PetscViewerSetUp(viewer));
3209   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3210 #if defined(PETSC_HAVE_HDF5)
3211   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3212 #endif
3213   if (isbinary) {
3214     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
3215 #if defined(PETSC_HAVE_HDF5)
3216   } else if (ishdf5) {
3217     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
3218 #endif
3219   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3220   PetscFunctionReturn(PETSC_SUCCESS);
3221 }
3222 
3223 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
3224 {
3225   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
3226   Mat           a, b;
3227 
3228   PetscFunctionBegin;
3229   a = matA->A;
3230   b = matB->A;
3231   PetscCall(MatEqual(a, b, flag));
3232   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
3233   PetscFunctionReturn(PETSC_SUCCESS);
3234 }
3235 
3236 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
3237 {
3238   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
3239 
3240   PetscFunctionBegin;
3241   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
3242   PetscCall(MatDestroy(&atb->atb));
3243   PetscCall(PetscFree(atb));
3244   PetscFunctionReturn(PETSC_SUCCESS);
3245 }
3246 
3247 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
3248 {
3249   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
3250 
3251   PetscFunctionBegin;
3252   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
3253   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
3254   PetscCall(PetscFree(abt));
3255   PetscFunctionReturn(PETSC_SUCCESS);
3256 }
3257 
3258 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3259 {
3260   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3261   Mat_TransMatMultDense *atb;
3262   MPI_Comm               comm;
3263   PetscMPIInt            size, *recvcounts;
3264   PetscScalar           *carray, *sendbuf;
3265   const PetscScalar     *atbarray;
3266   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
3267   const PetscInt        *ranges;
3268 
3269   PetscFunctionBegin;
3270   MatCheckProduct(C, 3);
3271   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3272   atb        = (Mat_TransMatMultDense *)C->product->data;
3273   recvcounts = atb->recvcounts;
3274   sendbuf    = atb->sendbuf;
3275 
3276   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3277   PetscCallMPI(MPI_Comm_size(comm, &size));
3278 
3279   /* compute atbarray = aseq^T * bseq */
3280   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
3281 
3282   PetscCall(MatGetOwnershipRanges(C, &ranges));
3283 
3284   /* arrange atbarray into sendbuf */
3285   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
3286   PetscCall(MatDenseGetLDA(atb->atb, &lda));
3287   for (proc = 0, k = 0; proc < size; proc++) {
3288     for (j = 0; j < cN; j++) {
3289       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
3290     }
3291   }
3292   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
3293 
3294   /* sum all atbarray to local values of C */
3295   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
3296   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
3297   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
3298   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3299   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3300   PetscFunctionReturn(PETSC_SUCCESS);
3301 }
3302 
3303 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3304 {
3305   MPI_Comm               comm;
3306   PetscMPIInt            size;
3307   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
3308   Mat_TransMatMultDense *atb;
3309   PetscBool              cisdense = PETSC_FALSE;
3310   PetscInt               i;
3311   const PetscInt        *ranges;
3312 
3313   PetscFunctionBegin;
3314   MatCheckProduct(C, 4);
3315   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3316   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3317   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
3318     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3319   }
3320 
3321   /* create matrix product C */
3322   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
3323 #if defined(PETSC_HAVE_CUDA)
3324   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
3325 #endif
3326 #if defined(PETSC_HAVE_HIP)
3327   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
3328 #endif
3329   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
3330   PetscCall(MatSetUp(C));
3331 
3332   /* create data structure for reuse C */
3333   PetscCallMPI(MPI_Comm_size(comm, &size));
3334   PetscCall(PetscNew(&atb));
3335   cM = C->rmap->N;
3336   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
3337   PetscCall(MatGetOwnershipRanges(C, &ranges));
3338   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
3339 
3340   C->product->data    = atb;
3341   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
3342   PetscFunctionReturn(PETSC_SUCCESS);
3343 }
3344 
3345 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3346 {
3347   MPI_Comm               comm;
3348   PetscMPIInt            i, size;
3349   PetscInt               maxRows, bufsiz;
3350   PetscMPIInt            tag;
3351   PetscInt               alg;
3352   Mat_MatTransMultDense *abt;
3353   Mat_Product           *product = C->product;
3354   PetscBool              flg;
3355 
3356   PetscFunctionBegin;
3357   MatCheckProduct(C, 4);
3358   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3359   /* check local size of A and B */
3360   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
3361 
3362   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
3363   alg = flg ? 0 : 1;
3364 
3365   /* setup matrix product C */
3366   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
3367   PetscCall(MatSetType(C, MATMPIDENSE));
3368   PetscCall(MatSetUp(C));
3369   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
3370 
3371   /* create data structure for reuse C */
3372   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3373   PetscCallMPI(MPI_Comm_size(comm, &size));
3374   PetscCall(PetscNew(&abt));
3375   abt->tag = tag;
3376   abt->alg = alg;
3377   switch (alg) {
3378   case 1: /* alg: "cyclic" */
3379     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
3380     bufsiz = A->cmap->N * maxRows;
3381     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
3382     break;
3383   default: /* alg: "allgatherv" */
3384     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
3385     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
3386     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
3387     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
3388     break;
3389   }
3390 
3391   C->product->data                = abt;
3392   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
3393   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
3394   PetscFunctionReturn(PETSC_SUCCESS);
3395 }
3396 
3397 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
3398 {
3399   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3400   Mat_MatTransMultDense *abt;
3401   MPI_Comm               comm;
3402   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
3403   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
3404   PetscInt               i, cK             = A->cmap->N, k, j, bn;
3405   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3406   const PetscScalar     *av, *bv;
3407   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
3408   MPI_Request            reqs[2];
3409   const PetscInt        *ranges;
3410 
3411   PetscFunctionBegin;
3412   MatCheckProduct(C, 3);
3413   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3414   abt = (Mat_MatTransMultDense *)C->product->data;
3415   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3416   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3417   PetscCallMPI(MPI_Comm_size(comm, &size));
3418   PetscCall(MatDenseGetArrayRead(a->A, &av));
3419   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3420   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3421   PetscCall(MatDenseGetLDA(a->A, &i));
3422   PetscCall(PetscBLASIntCast(i, &alda));
3423   PetscCall(MatDenseGetLDA(b->A, &i));
3424   PetscCall(PetscBLASIntCast(i, &blda));
3425   PetscCall(MatDenseGetLDA(c->A, &i));
3426   PetscCall(PetscBLASIntCast(i, &clda));
3427   PetscCall(MatGetOwnershipRanges(B, &ranges));
3428   bn = B->rmap->n;
3429   if (blda == bn) {
3430     sendbuf = (PetscScalar *)bv;
3431   } else {
3432     sendbuf = abt->buf[0];
3433     for (k = 0, i = 0; i < cK; i++) {
3434       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
3435     }
3436   }
3437   if (size > 1) {
3438     sendto   = (rank + size - 1) % size;
3439     recvfrom = (rank + size + 1) % size;
3440   } else {
3441     sendto = recvfrom = 0;
3442   }
3443   PetscCall(PetscBLASIntCast(cK, &ck));
3444   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3445   recvisfrom = rank;
3446   for (i = 0; i < size; i++) {
3447     /* we have finished receiving in sending, bufs can be read/modified */
3448     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
3449     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
3450 
3451     if (nextrecvisfrom != rank) {
3452       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
3453       sendsiz = cK * bn;
3454       recvsiz = cK * nextbn;
3455       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
3456       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
3457       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
3458     }
3459 
3460     /* local aseq * sendbuf^T */
3461     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
3462     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
3463 
3464     if (nextrecvisfrom != rank) {
3465       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
3466       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
3467     }
3468     bn         = nextbn;
3469     recvisfrom = nextrecvisfrom;
3470     sendbuf    = recvbuf;
3471   }
3472   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3473   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3474   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3475   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3476   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3477   PetscFunctionReturn(PETSC_SUCCESS);
3478 }
3479 
3480 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
3481 {
3482   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3483   Mat_MatTransMultDense *abt;
3484   MPI_Comm               comm;
3485   PetscMPIInt            size;
3486   PetscScalar           *cv, *sendbuf, *recvbuf;
3487   const PetscScalar     *av, *bv;
3488   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
3489   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3490   PetscBLASInt           cm, cn, ck, alda, clda;
3491 
3492   PetscFunctionBegin;
3493   MatCheckProduct(C, 3);
3494   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3495   abt = (Mat_MatTransMultDense *)C->product->data;
3496   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3497   PetscCallMPI(MPI_Comm_size(comm, &size));
3498   PetscCall(MatDenseGetArrayRead(a->A, &av));
3499   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3500   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3501   PetscCall(MatDenseGetLDA(a->A, &i));
3502   PetscCall(PetscBLASIntCast(i, &alda));
3503   PetscCall(MatDenseGetLDA(b->A, &blda));
3504   PetscCall(MatDenseGetLDA(c->A, &i));
3505   PetscCall(PetscBLASIntCast(i, &clda));
3506   /* copy transpose of B into buf[0] */
3507   bn      = B->rmap->n;
3508   sendbuf = abt->buf[0];
3509   recvbuf = abt->buf[1];
3510   for (k = 0, j = 0; j < bn; j++) {
3511     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
3512   }
3513   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3514   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
3515   PetscCall(PetscBLASIntCast(cK, &ck));
3516   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3517   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
3518   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
3519   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3520   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3521   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3522   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3523   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3524   PetscFunctionReturn(PETSC_SUCCESS);
3525 }
3526 
3527 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3528 {
3529   Mat_MatTransMultDense *abt;
3530 
3531   PetscFunctionBegin;
3532   MatCheckProduct(C, 3);
3533   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3534   abt = (Mat_MatTransMultDense *)C->product->data;
3535   switch (abt->alg) {
3536   case 1:
3537     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
3538     break;
3539   default:
3540     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
3541     break;
3542   }
3543   PetscFunctionReturn(PETSC_SUCCESS);
3544 }
3545 
3546 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
3547 {
3548   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
3549 
3550   PetscFunctionBegin;
3551   PetscCall(MatDestroy(&ab->Ce));
3552   PetscCall(MatDestroy(&ab->Ae));
3553   PetscCall(MatDestroy(&ab->Be));
3554   PetscCall(PetscFree(ab));
3555   PetscFunctionReturn(PETSC_SUCCESS);
3556 }
3557 
3558 #if defined(PETSC_HAVE_ELEMENTAL)
3559 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3560 {
3561   Mat_MatMultDense *ab;
3562 
3563   PetscFunctionBegin;
3564   MatCheckProduct(C, 3);
3565   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
3566   ab = (Mat_MatMultDense *)C->product->data;
3567   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
3568   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
3569   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
3570   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
3571   PetscFunctionReturn(PETSC_SUCCESS);
3572 }
3573 
3574 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3575 {
3576   Mat               Ae, Be, Ce;
3577   Mat_MatMultDense *ab;
3578 
3579   PetscFunctionBegin;
3580   MatCheckProduct(C, 4);
3581   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3582   /* check local size of A and B */
3583   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
3584     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3585   }
3586 
3587   /* create elemental matrices Ae and Be */
3588   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
3589   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
3590   PetscCall(MatSetType(Ae, MATELEMENTAL));
3591   PetscCall(MatSetUp(Ae));
3592   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
3593 
3594   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
3595   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
3596   PetscCall(MatSetType(Be, MATELEMENTAL));
3597   PetscCall(MatSetUp(Be));
3598   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
3599 
3600   /* compute symbolic Ce = Ae*Be */
3601   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
3602   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
3603 
3604   /* setup C */
3605   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3606   PetscCall(MatSetType(C, MATDENSE));
3607   PetscCall(MatSetUp(C));
3608 
3609   /* create data structure for reuse Cdense */
3610   PetscCall(PetscNew(&ab));
3611   ab->Ae = Ae;
3612   ab->Be = Be;
3613   ab->Ce = Ce;
3614 
3615   C->product->data       = ab;
3616   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
3617   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3618   PetscFunctionReturn(PETSC_SUCCESS);
3619 }
3620 #endif
3621 
3622 #if defined(PETSC_HAVE_ELEMENTAL)
3623 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3624 {
3625   PetscFunctionBegin;
3626   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3627   C->ops->productsymbolic = MatProductSymbolic_AB;
3628   PetscFunctionReturn(PETSC_SUCCESS);
3629 }
3630 #endif
3631 
3632 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3633 {
3634   Mat_Product *product = C->product;
3635   Mat          A = product->A, B = product->B;
3636 
3637   PetscFunctionBegin;
3638   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3639     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3640   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3641   C->ops->productsymbolic          = MatProductSymbolic_AtB;
3642   PetscFunctionReturn(PETSC_SUCCESS);
3643 }
3644 
3645 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3646 {
3647   Mat_Product *product     = C->product;
3648   const char  *algTypes[2] = {"allgatherv", "cyclic"};
3649   PetscInt     alg, nalg = 2;
3650   PetscBool    flg = PETSC_FALSE;
3651 
3652   PetscFunctionBegin;
3653   /* Set default algorithm */
3654   alg = 0; /* default is allgatherv */
3655   PetscCall(PetscStrcmp(product->alg, "default", &flg));
3656   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3657 
3658   /* Get runtime option */
3659   if (product->api_user) {
3660     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
3661     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
3662     PetscOptionsEnd();
3663   } else {
3664     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
3665     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
3666     PetscOptionsEnd();
3667   }
3668   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3669 
3670   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3671   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3672   PetscFunctionReturn(PETSC_SUCCESS);
3673 }
3674 
3675 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3676 {
3677   Mat_Product *product = C->product;
3678 
3679   PetscFunctionBegin;
3680   switch (product->type) {
3681 #if defined(PETSC_HAVE_ELEMENTAL)
3682   case MATPRODUCT_AB:
3683     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
3684     break;
3685 #endif
3686   case MATPRODUCT_AtB:
3687     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3688     break;
3689   case MATPRODUCT_ABt:
3690     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3691     break;
3692   default:
3693     break;
3694   }
3695   PetscFunctionReturn(PETSC_SUCCESS);
3696 }
3697