xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the sequential or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscValidPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(0);
42 }
43 
44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(0);
70 }
71 
72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(0);
94 }
95 
96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if defined(PETSC_HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif (PETSC_HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(0);
129 }
130 
131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(0);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if defined(PETSC_HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif (PETSC_HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(0);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(0);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(0);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(0);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(0);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(0);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(0);
367 }
368 
369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(0);
376 }
377 
378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(0);
390 }
391 
392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(0);
424 }
425 
426 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(0);
433 }
434 
435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
554   PetscScalar       *x, zero = 0.0;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecSet(v, zero));
559   PetscCall(VecGetArray(v, &x));
560   PetscCall(VecGetSize(v, &n));
561   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(0);
570 }
571 
572 PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
579 #endif
580   PetscCall(MatStashDestroy_Private(&mat->stash));
581   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583   PetscCall(MatDestroy(&mdn->A));
584   PetscCall(VecDestroy(&mdn->lvec));
585   PetscCall(PetscSFDestroy(&mdn->Mvctx));
586   PetscCall(VecDestroy(&mdn->cvec));
587   PetscCall(MatDestroy(&mdn->cmat));
588 
589   PetscCall(PetscFree(mat->data));
590   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
591 
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605 #if defined(PETSC_HAVE_ELEMENTAL)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607 #endif
608 #if defined(PETSC_HAVE_SCALAPACK)
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610 #endif
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614 #if defined(PETSC_HAVE_CUDA)
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632 #endif
633 #if defined(PETSC_HAVE_HIP)
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(0);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(0);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(0);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(0);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(PetscViewerFlush(viewer));
747     PetscCall(MatDestroy(&A));
748   }
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753 {
754   PetscBool iascii, isbinary, isdraw, issocket;
755 
756   PetscFunctionBegin;
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
761 
762   if (iascii || issocket || isdraw) {
763     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771   Mat            mdn = mat->A;
772   PetscLogDouble isend[5], irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size = 1.0;
776 
777   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
778 
779   isend[0] = info->nz_used;
780   isend[1] = info->nz_allocated;
781   isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;
783   isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(0);
811 }
812 
813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A, 1);
823     PetscCall(MatSetOption(a->A, op, flg));
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A, 1);
827     a->roworiented = flg;
828     PetscCall(MatSetOption(a->A, op, flg));
829     break;
830   case MAT_FORCE_DIAGONAL_ENTRIES:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833   case MAT_SORTED_FULL:
834     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835     break;
836   case MAT_IGNORE_OFF_PROC_ENTRIES:
837     a->donotstash = flg;
838     break;
839   case MAT_SYMMETRIC:
840   case MAT_STRUCTURALLY_SYMMETRIC:
841   case MAT_HERMITIAN:
842   case MAT_SYMMETRY_ETERNAL:
843   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844   case MAT_SPD:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846   case MAT_IGNORE_ZERO_ENTRIES:
847   case MAT_SPD_ETERNAL:
848     /* if the diagonal matrix is square it inherits some of the properties above */
849     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850     break;
851   default:
852     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853   }
854   PetscFunctionReturn(0);
855 }
856 
857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860   const PetscScalar *l;
861   PetscScalar        x, *v, *vv, *r;
862   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
863 
864   PetscFunctionBegin;
865   PetscCall(MatDenseGetArray(mdn->A, &vv));
866   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867   PetscCall(MatGetLocalSize(A, &s2, &s3));
868   if (ll) {
869     PetscCall(VecGetLocalSize(ll, &s2a));
870     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871     PetscCall(VecGetArrayRead(ll, &l));
872     for (i = 0; i < m; i++) {
873       x = l[i];
874       v = vv + i;
875       for (j = 0; j < n; j++) {
876         (*v) *= x;
877         v += lda;
878       }
879     }
880     PetscCall(VecRestoreArrayRead(ll, &l));
881     PetscCall(PetscLogFlops(1.0 * n * m));
882   }
883   if (rr) {
884     const PetscScalar *ar;
885 
886     PetscCall(VecGetLocalSize(rr, &s3a));
887     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888     PetscCall(VecGetArrayRead(rr, &ar));
889     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890     PetscCall(VecGetArray(mdn->lvec, &r));
891     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(VecRestoreArrayRead(rr, &ar));
894     for (i = 0; i < n; i++) {
895       x = r[i];
896       v = vv + i * lda;
897       for (j = 0; j < m; j++) (*v++) *= x;
898     }
899     PetscCall(VecRestoreArray(mdn->lvec, &r));
900     PetscCall(PetscLogFlops(1.0 * n * m));
901   }
902   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907 {
908   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909   PetscInt           i, j;
910   PetscMPIInt        size;
911   PetscReal          sum = 0.0;
912   const PetscScalar *av, *v;
913 
914   PetscFunctionBegin;
915   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916   v = av;
917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918   if (size == 1) {
919     PetscCall(MatNorm(mdn->A, type, nrm));
920   } else {
921     if (type == NORM_FROBENIUS) {
922       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923         sum += PetscRealPart(PetscConj(*v) * (*v));
924         v++;
925       }
926       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927       *nrm = PetscSqrtReal(*nrm);
928       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929     } else if (type == NORM_1) {
930       PetscReal *tmp, *tmp2;
931       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932       *nrm = 0.0;
933       v    = av;
934       for (j = 0; j < mdn->A->cmap->n; j++) {
935         for (i = 0; i < mdn->A->rmap->n; i++) {
936           tmp[j] += PetscAbsScalar(*v);
937           v++;
938         }
939       }
940       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941       for (j = 0; j < A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       PetscCall(PetscFree2(tmp, tmp2));
945       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       PetscCall(MatNorm(mdn->A, type, &ntemp));
949       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951   }
952   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957 {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959   Mat           B;
960   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961   PetscInt      j, i, lda;
962   PetscScalar  *v;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971   } else B = *matout;
972 
973   m = a->A->rmap->n;
974   n = a->A->cmap->n;
975   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(MatDenseGetLDA(a->A, &lda));
977   PetscCall(PetscMalloc1(m, &rwork));
978   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979   for (j = 0; j < n; j++) {
980     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981     v += lda;
982   }
983   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984   PetscCall(PetscFree(rwork));
985   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     PetscCall(MatHeaderMerge(A, &B));
991   }
992   PetscFunctionReturn(0);
993 }
994 
995 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
997 
998 PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(PetscLayoutSetUp(A->rmap));
1002   PetscCall(PetscLayoutSetUp(A->cmap));
1003   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008 {
1009   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatConjugate(a->A));
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PetscErrorCode MatRealPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatRealPart(a->A));
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(MatImaginaryPart(a->A));
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044 {
1045   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1046 
1047   PetscFunctionBegin;
1048   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1055 
1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057 {
1058   PetscInt      i, m, n;
1059   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060   PetscReal    *work;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatGetSize(A, &m, &n));
1064   PetscCall(PetscMalloc1(n, &work));
1065   if (type == REDUCTION_MEAN_REALPART) {
1066     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069   } else {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071   }
1072   if (type == NORM_2) {
1073     for (i = 0; i < n; i++) work[i] *= work[i];
1074   }
1075   if (type == NORM_INFINITY) {
1076     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077   } else {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079   }
1080   PetscCall(PetscFree(work));
1081   if (type == NORM_2) {
1082     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084     for (i = 0; i < n; i++) reductions[i] /= m;
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #if defined(PETSC_HAVE_CUDA)
1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1091 {
1092   PetscScalar *da;
1093   PetscInt     lda;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(MatDenseCUDAGetArray(A, &da));
1097   PetscCall(MatDenseGetLDA(A, &lda));
1098   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1099   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1100   PetscCall(MatDenseCUDARestoreArray(A, &da));
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1105 {
1106   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1107   PetscInt      lda;
1108 
1109   PetscFunctionBegin;
1110   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1111   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1112   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1113   a->vecinuse = col + 1;
1114   PetscCall(MatDenseGetLDA(a->A, &lda));
1115   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1116   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1117   *v = a->cvec;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1122 {
1123   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1124 
1125   PetscFunctionBegin;
1126   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1127   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1128   a->vecinuse = 0;
1129   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1130   PetscCall(VecCUDAResetArray(a->cvec));
1131   if (v) *v = NULL;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136 {
1137   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138   PetscInt      lda;
1139 
1140   PetscFunctionBegin;
1141   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1142   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1143   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1144   a->vecinuse = col + 1;
1145   PetscCall(MatDenseGetLDA(a->A, &lda));
1146   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1147   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1148   PetscCall(VecLockReadPush(a->cvec));
1149   *v = a->cvec;
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1154 {
1155   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1159   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1160   a->vecinuse = 0;
1161   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1162   PetscCall(VecLockReadPop(a->cvec));
1163   PetscCall(VecCUDAResetArray(a->cvec));
1164   if (v) *v = NULL;
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1169 {
1170   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1171   PetscInt      lda;
1172 
1173   PetscFunctionBegin;
1174   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1175   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1176   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1177   a->vecinuse = col + 1;
1178   PetscCall(MatDenseGetLDA(a->A, &lda));
1179   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1180   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1181   *v = a->cvec;
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1186 {
1187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1188 
1189   PetscFunctionBegin;
1190   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1191   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1192   a->vecinuse = 0;
1193   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1194   PetscCall(VecCUDAResetArray(a->cvec));
1195   if (v) *v = NULL;
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1200 {
1201   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1205   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1206   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1211 {
1212   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1213 
1214   PetscFunctionBegin;
1215   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1216   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1217   PetscCall(MatDenseCUDAResetArray(l->A));
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1222 {
1223   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1224 
1225   PetscFunctionBegin;
1226   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1227   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1228   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1233 {
1234   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1242 {
1243   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1251 {
1252   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1253 
1254   PetscFunctionBegin;
1255   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1260 {
1261   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1262 
1263   PetscFunctionBegin;
1264   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1269 {
1270   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1271 
1272   PetscFunctionBegin;
1273   PetscCall(MatDenseCUDAGetArray(l->A, a));
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1278 {
1279   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1293 
1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1295 {
1296   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1297 
1298   PetscFunctionBegin;
1299   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1300   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1301   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1302   mat->boundtocpu = bind;
1303   if (!bind) {
1304     PetscBool iscuda;
1305 
1306     PetscCall(PetscFree(mat->defaultrandtype));
1307     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1308     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1309     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1310     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1311     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1312     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1313     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1314     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1315     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1316     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1317     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1318     mat->ops->shift = MatShift_MPIDenseCUDA;
1319   } else {
1320     PetscCall(PetscFree(mat->defaultrandtype));
1321     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1322     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1323     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1324     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1325     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1326     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1327     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1328     mat->ops->shift = MatShift_MPIDense;
1329   }
1330   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1335 {
1336   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1337   PetscBool     iscuda;
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1341   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1342   if (!iscuda) PetscFunctionReturn(0);
1343   PetscCall(PetscLayoutSetUp(A->rmap));
1344   PetscCall(PetscLayoutSetUp(A->cmap));
1345   if (!d->A) {
1346     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1347     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1348   }
1349   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1350   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1351   A->preallocated = PETSC_TRUE;
1352   A->assembled    = PETSC_TRUE;
1353   PetscFunctionReturn(0);
1354 }
1355 #endif
1356 
1357 #if defined(PETSC_HAVE_HIP)
1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha)
1359 {
1360   PetscScalar *da;
1361   PetscInt     lda;
1362 
1363   PetscFunctionBegin;
1364   PetscCall(MatDenseHIPGetArray(A, &da));
1365   PetscCall(MatDenseGetLDA(A, &lda));
1366   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1367   PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1368   PetscCall(MatDenseHIPRestoreArray(A, &da));
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1373 {
1374   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1375   PetscInt      lda;
1376 
1377   PetscFunctionBegin;
1378   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1379   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1380   if (!a->cvec) {
1381     PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1382     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1383   }
1384   a->vecinuse = col + 1;
1385   PetscCall(MatDenseGetLDA(a->A, &lda));
1386   PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1387   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1388   *v = a->cvec;
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1393 {
1394   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1395 
1396   PetscFunctionBegin;
1397   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1398   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1399   a->vecinuse = 0;
1400   PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1401   PetscCall(VecHIPResetArray(a->cvec));
1402   if (v) *v = NULL;
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1407 {
1408   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1409   PetscInt      lda;
1410 
1411   PetscFunctionBegin;
1412   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1413   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1414   if (!a->cvec) {
1415     PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1416     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1417   }
1418   a->vecinuse = col + 1;
1419   PetscCall(MatDenseGetLDA(a->A, &lda));
1420   PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse));
1421   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1422   PetscCall(VecLockReadPush(a->cvec));
1423   *v = a->cvec;
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1428 {
1429   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1430 
1431   PetscFunctionBegin;
1432   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1433   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1434   a->vecinuse = 0;
1435   PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse));
1436   PetscCall(VecLockReadPop(a->cvec));
1437   PetscCall(VecHIPResetArray(a->cvec));
1438   if (v) *v = NULL;
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1443 {
1444   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1445   PetscInt      lda;
1446 
1447   PetscFunctionBegin;
1448   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1449   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1450   if (!a->cvec) {
1451     PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1452     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1453   }
1454   a->vecinuse = col + 1;
1455   PetscCall(MatDenseGetLDA(a->A, &lda));
1456   PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1457   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1458   *v = a->cvec;
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1463 {
1464   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1465 
1466   PetscFunctionBegin;
1467   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1468   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1469   a->vecinuse = 0;
1470   PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1471   PetscCall(VecHIPResetArray(a->cvec));
1472   if (v) *v = NULL;
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1477 {
1478   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1479 
1480   PetscFunctionBegin;
1481   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1482   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1483   PetscCall(MatDenseHIPPlaceArray(l->A, a));
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A)
1488 {
1489   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1490 
1491   PetscFunctionBegin;
1492   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1493   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1494   PetscCall(MatDenseHIPResetArray(l->A));
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1499 {
1500   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1501 
1502   PetscFunctionBegin;
1503   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1504   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1505   PetscCall(MatDenseHIPReplaceArray(l->A, a));
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1510 {
1511   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1512 
1513   PetscFunctionBegin;
1514   PetscCall(MatDenseHIPGetArrayWrite(l->A, a));
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1519 {
1520   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1521 
1522   PetscFunctionBegin;
1523   PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a));
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1528 {
1529   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1530 
1531   PetscFunctionBegin;
1532   PetscCall(MatDenseHIPGetArrayRead(l->A, a));
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1537 {
1538   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1539 
1540   PetscFunctionBegin;
1541   PetscCall(MatDenseHIPRestoreArrayRead(l->A, a));
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a)
1546 {
1547   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1548 
1549   PetscFunctionBegin;
1550   PetscCall(MatDenseHIPGetArray(l->A, a));
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a)
1555 {
1556   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1557 
1558   PetscFunctionBegin;
1559   PetscCall(MatDenseHIPRestoreArray(l->A, a));
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1564 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1565 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1566 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1567 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1568 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1569 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1570 
1571 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind)
1572 {
1573   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1574 
1575   PetscFunctionBegin;
1576   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1577   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1578   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1579   mat->boundtocpu = bind;
1580   if (!bind) {
1581     PetscBool iscuda;
1582 
1583     PetscCall(PetscFree(mat->defaultrandtype));
1584     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1585     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda));
1586     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1587     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda));
1588     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1589     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP));
1590     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP));
1591     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP));
1592     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP));
1593     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP));
1594     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP));
1595     mat->ops->shift = MatShift_MPIDenseHIP;
1596   } else {
1597     PetscCall(PetscFree(mat->defaultrandtype));
1598     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1599     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1600     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1601     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1602     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1603     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1604     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1605     mat->ops->shift = MatShift_MPIDense;
1606   }
1607   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data)
1612 {
1613   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1614   PetscBool     iscuda;
1615 
1616   PetscFunctionBegin;
1617   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1618   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda));
1619   if (!iscuda) PetscFunctionReturn(0);
1620   PetscCall(PetscLayoutSetUp(A->rmap));
1621   PetscCall(PetscLayoutSetUp(A->cmap));
1622   if (!d->A) {
1623     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1624     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A));
1625     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1626   }
1627   PetscCall(MatSetType(d->A, MATSEQDENSEHIP));
1628   PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data));
1629   A->preallocated = PETSC_TRUE;
1630   A->assembled    = PETSC_TRUE;
1631   PetscFunctionReturn(0);
1632 }
1633 #endif
1634 
1635 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1636 {
1637   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1638 
1639   PetscFunctionBegin;
1640   PetscCall(MatSetRandom(d->A, rctx));
1641 #if defined(PETSC_HAVE_DEVICE)
1642   x->offloadmask = d->A->offloadmask;
1643 #endif
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1648 {
1649   PetscFunctionBegin;
1650   *missing = PETSC_FALSE;
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1655 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1656 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1657 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1658 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1659 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1660 
1661 /* -------------------------------------------------------------------*/
1662 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1663                                        MatGetRow_MPIDense,
1664                                        MatRestoreRow_MPIDense,
1665                                        MatMult_MPIDense,
1666                                        /*  4*/ MatMultAdd_MPIDense,
1667                                        MatMultTranspose_MPIDense,
1668                                        MatMultTransposeAdd_MPIDense,
1669                                        NULL,
1670                                        NULL,
1671                                        NULL,
1672                                        /* 10*/ NULL,
1673                                        NULL,
1674                                        NULL,
1675                                        NULL,
1676                                        MatTranspose_MPIDense,
1677                                        /* 15*/ MatGetInfo_MPIDense,
1678                                        MatEqual_MPIDense,
1679                                        MatGetDiagonal_MPIDense,
1680                                        MatDiagonalScale_MPIDense,
1681                                        MatNorm_MPIDense,
1682                                        /* 20*/ MatAssemblyBegin_MPIDense,
1683                                        MatAssemblyEnd_MPIDense,
1684                                        MatSetOption_MPIDense,
1685                                        MatZeroEntries_MPIDense,
1686                                        /* 24*/ MatZeroRows_MPIDense,
1687                                        NULL,
1688                                        NULL,
1689                                        NULL,
1690                                        NULL,
1691                                        /* 29*/ MatSetUp_MPIDense,
1692                                        NULL,
1693                                        NULL,
1694                                        MatGetDiagonalBlock_MPIDense,
1695                                        NULL,
1696                                        /* 34*/ MatDuplicate_MPIDense,
1697                                        NULL,
1698                                        NULL,
1699                                        NULL,
1700                                        NULL,
1701                                        /* 39*/ MatAXPY_MPIDense,
1702                                        MatCreateSubMatrices_MPIDense,
1703                                        NULL,
1704                                        MatGetValues_MPIDense,
1705                                        MatCopy_MPIDense,
1706                                        /* 44*/ NULL,
1707                                        MatScale_MPIDense,
1708                                        MatShift_MPIDense,
1709                                        NULL,
1710                                        NULL,
1711                                        /* 49*/ MatSetRandom_MPIDense,
1712                                        NULL,
1713                                        NULL,
1714                                        NULL,
1715                                        NULL,
1716                                        /* 54*/ NULL,
1717                                        NULL,
1718                                        NULL,
1719                                        NULL,
1720                                        NULL,
1721                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1722                                        MatDestroy_MPIDense,
1723                                        MatView_MPIDense,
1724                                        NULL,
1725                                        NULL,
1726                                        /* 64*/ NULL,
1727                                        NULL,
1728                                        NULL,
1729                                        NULL,
1730                                        NULL,
1731                                        /* 69*/ NULL,
1732                                        NULL,
1733                                        NULL,
1734                                        NULL,
1735                                        NULL,
1736                                        /* 74*/ NULL,
1737                                        NULL,
1738                                        NULL,
1739                                        NULL,
1740                                        NULL,
1741                                        /* 79*/ NULL,
1742                                        NULL,
1743                                        NULL,
1744                                        NULL,
1745                                        /* 83*/ MatLoad_MPIDense,
1746                                        NULL,
1747                                        NULL,
1748                                        NULL,
1749                                        NULL,
1750                                        NULL,
1751                                        /* 89*/ NULL,
1752                                        NULL,
1753                                        NULL,
1754                                        NULL,
1755                                        NULL,
1756                                        /* 94*/ NULL,
1757                                        NULL,
1758                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1759                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1760                                        NULL,
1761                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1762                                        NULL,
1763                                        NULL,
1764                                        MatConjugate_MPIDense,
1765                                        NULL,
1766                                        /*104*/ NULL,
1767                                        MatRealPart_MPIDense,
1768                                        MatImaginaryPart_MPIDense,
1769                                        NULL,
1770                                        NULL,
1771                                        /*109*/ NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        MatGetColumnVector_MPIDense,
1775                                        MatMissingDiagonal_MPIDense,
1776                                        /*114*/ NULL,
1777                                        NULL,
1778                                        NULL,
1779                                        NULL,
1780                                        NULL,
1781                                        /*119*/ NULL,
1782                                        NULL,
1783                                        NULL,
1784                                        NULL,
1785                                        NULL,
1786                                        /*124*/ NULL,
1787                                        MatGetColumnReductions_MPIDense,
1788                                        NULL,
1789                                        NULL,
1790                                        NULL,
1791                                        /*129*/ NULL,
1792                                        NULL,
1793                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1794                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1795                                        NULL,
1796                                        /*134*/ NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        /*139*/ NULL,
1802                                        NULL,
1803                                        NULL,
1804                                        NULL,
1805                                        NULL,
1806                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1807                                        /*145*/ NULL,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        /*150*/ NULL};
1813 
1814 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1815 {
1816   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1817   MatType       mtype = MATSEQDENSE;
1818 
1819   PetscFunctionBegin;
1820   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1821   PetscCall(PetscLayoutSetUp(mat->rmap));
1822   PetscCall(PetscLayoutSetUp(mat->cmap));
1823   if (!a->A) {
1824     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1825     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1826   }
1827 #if defined(PETSC_HAVE_CUDA)
1828   PetscBool iscuda;
1829   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1830   if (iscuda) mtype = MATSEQDENSECUDA;
1831 #endif
1832 #if defined(PETSC_HAVE_HIP)
1833   PetscBool iship;
1834   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1835   if (iship) mtype = MATSEQDENSEHIP;
1836 #endif
1837   PetscCall(MatSetType(a->A, mtype));
1838   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1839 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1840   mat->offloadmask = a->A->offloadmask;
1841 #endif
1842   mat->preallocated = PETSC_TRUE;
1843   mat->assembled    = PETSC_TRUE;
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1848 {
1849   Mat B, C;
1850 
1851   PetscFunctionBegin;
1852   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1853   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1854   PetscCall(MatDestroy(&C));
1855   if (reuse == MAT_REUSE_MATRIX) {
1856     C = *newmat;
1857   } else C = NULL;
1858   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1859   PetscCall(MatDestroy(&B));
1860   if (reuse == MAT_INPLACE_MATRIX) {
1861     PetscCall(MatHeaderReplace(A, &C));
1862   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1867 {
1868   Mat B, C;
1869 
1870   PetscFunctionBegin;
1871   PetscCall(MatDenseGetLocalMatrix(A, &C));
1872   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1873   if (reuse == MAT_REUSE_MATRIX) {
1874     C = *newmat;
1875   } else C = NULL;
1876   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1877   PetscCall(MatDestroy(&B));
1878   if (reuse == MAT_INPLACE_MATRIX) {
1879     PetscCall(MatHeaderReplace(A, &C));
1880   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #if defined(PETSC_HAVE_ELEMENTAL)
1885 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1886 {
1887   Mat          mat_elemental;
1888   PetscScalar *v;
1889   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1890 
1891   PetscFunctionBegin;
1892   if (reuse == MAT_REUSE_MATRIX) {
1893     mat_elemental = *newmat;
1894     PetscCall(MatZeroEntries(*newmat));
1895   } else {
1896     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1897     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1898     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1899     PetscCall(MatSetUp(mat_elemental));
1900     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1901   }
1902 
1903   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1904   for (i = 0; i < N; i++) cols[i] = i;
1905   for (i = 0; i < m; i++) rows[i] = rstart + i;
1906 
1907   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1908   PetscCall(MatDenseGetArray(A, &v));
1909   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1910   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1911   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1912   PetscCall(MatDenseRestoreArray(A, &v));
1913   PetscCall(PetscFree2(rows, cols));
1914 
1915   if (reuse == MAT_INPLACE_MATRIX) {
1916     PetscCall(MatHeaderReplace(A, &mat_elemental));
1917   } else {
1918     *newmat = mat_elemental;
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 #endif
1923 
1924 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1925 {
1926   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1927 
1928   PetscFunctionBegin;
1929   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1934 {
1935   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1936 
1937   PetscFunctionBegin;
1938   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1943 {
1944   Mat_MPIDense *mat;
1945   PetscInt      m, nloc, N;
1946 
1947   PetscFunctionBegin;
1948   PetscCall(MatGetSize(inmat, &m, &N));
1949   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1950   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1951     PetscInt sum;
1952 
1953     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1954     /* Check sum(n) = N */
1955     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1956     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1957 
1958     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1959     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1960   }
1961 
1962   /* numeric phase */
1963   mat = (Mat_MPIDense *)(*outmat)->data;
1964   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #if defined(PETSC_HAVE_CUDA)
1969 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1970 {
1971   Mat           B;
1972   Mat_MPIDense *m;
1973 
1974   PetscFunctionBegin;
1975   if (reuse == MAT_INITIAL_MATRIX) {
1976     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1977   } else if (reuse == MAT_REUSE_MATRIX) {
1978     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1979   }
1980 
1981   B = *newmat;
1982   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1983   PetscCall(PetscFree(B->defaultvectype));
1984   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1985   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1986   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1987   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1988   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1989   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1990   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1991   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1992   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1993   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1994   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1995   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1996   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1997   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1998   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1999   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
2000   m = (Mat_MPIDense *)(B)->data;
2001   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2002   B->ops->bindtocpu = NULL;
2003   B->offloadmask    = PETSC_OFFLOAD_CPU;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2008 {
2009   Mat           B;
2010   Mat_MPIDense *m;
2011 
2012   PetscFunctionBegin;
2013   if (reuse == MAT_INITIAL_MATRIX) {
2014     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2015   } else if (reuse == MAT_REUSE_MATRIX) {
2016     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2017   }
2018 
2019   B = *newmat;
2020   PetscCall(PetscFree(B->defaultvectype));
2021   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
2022   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
2023   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
2024   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2025   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2027   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2028   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
2029   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
2030   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
2031   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
2032   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
2033   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
2034   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
2035   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
2036   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
2037   m = (Mat_MPIDense *)(B->data);
2038   if (m->A) {
2039     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
2040     B->offloadmask = PETSC_OFFLOAD_BOTH;
2041   } else {
2042     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2043   }
2044   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
2045 
2046   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
2047   PetscFunctionReturn(0);
2048 }
2049 #endif
2050 
2051 #if defined(PETSC_HAVE_HIP)
2052 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2053 {
2054   Mat           B;
2055   Mat_MPIDense *m;
2056 
2057   PetscFunctionBegin;
2058   if (reuse == MAT_INITIAL_MATRIX) {
2059     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2060   } else if (reuse == MAT_REUSE_MATRIX) {
2061     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2062   }
2063 
2064   B = *newmat;
2065   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE));
2066   PetscCall(PetscFree(B->defaultvectype));
2067   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
2068   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
2069   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL));
2070   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
2071   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
2072   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
2073   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
2074   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL));
2075   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL));
2076   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL));
2077   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL));
2078   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL));
2079   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL));
2080   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL));
2081   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL));
2082   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL));
2083   m = (Mat_MPIDense *)(B)->data;
2084   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2085   B->ops->bindtocpu = NULL;
2086   B->offloadmask    = PETSC_OFFLOAD_CPU;
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2091 {
2092   Mat           B;
2093   Mat_MPIDense *m;
2094 
2095   PetscFunctionBegin;
2096   if (reuse == MAT_INITIAL_MATRIX) {
2097     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2098   } else if (reuse == MAT_REUSE_MATRIX) {
2099     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2100   }
2101 
2102   B = *newmat;
2103   PetscCall(PetscFree(B->defaultvectype));
2104   PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype));
2105   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP));
2106   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense));
2107   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2108   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2109   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2110   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2111   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP));
2112   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP));
2113   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP));
2114   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP));
2115   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP));
2116   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP));
2117   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP));
2118   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP));
2119   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP));
2120   m = (Mat_MPIDense *)(B->data);
2121   if (m->A) {
2122     PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A));
2123     B->offloadmask = PETSC_OFFLOAD_BOTH;
2124   } else {
2125     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2126   }
2127   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE));
2128 
2129   B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP;
2130   PetscFunctionReturn(0);
2131 }
2132 #endif
2133 
2134 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2135 {
2136   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2137   PetscInt      lda;
2138 
2139   PetscFunctionBegin;
2140   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2141   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2142   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2143   a->vecinuse = col + 1;
2144   PetscCall(MatDenseGetLDA(a->A, &lda));
2145   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
2146   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2147   *v = a->cvec;
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2152 {
2153   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2154 
2155   PetscFunctionBegin;
2156   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2157   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
2158   a->vecinuse = 0;
2159   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
2160   PetscCall(VecResetArray(a->cvec));
2161   if (v) *v = NULL;
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2166 {
2167   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2168   PetscInt      lda;
2169 
2170   PetscFunctionBegin;
2171   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2172   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2173   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2174   a->vecinuse = col + 1;
2175   PetscCall(MatDenseGetLDA(a->A, &lda));
2176   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
2177   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2178   PetscCall(VecLockReadPush(a->cvec));
2179   *v = a->cvec;
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2184 {
2185   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2186 
2187   PetscFunctionBegin;
2188   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2189   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2190   a->vecinuse = 0;
2191   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
2192   PetscCall(VecLockReadPop(a->cvec));
2193   PetscCall(VecResetArray(a->cvec));
2194   if (v) *v = NULL;
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2199 {
2200   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2201   PetscInt      lda;
2202 
2203   PetscFunctionBegin;
2204   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2205   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2206   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2207   a->vecinuse = col + 1;
2208   PetscCall(MatDenseGetLDA(a->A, &lda));
2209   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2210   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2211   *v = a->cvec;
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2216 {
2217   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2218 
2219   PetscFunctionBegin;
2220   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2221   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2222   a->vecinuse = 0;
2223   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2224   PetscCall(VecResetArray(a->cvec));
2225   if (v) *v = NULL;
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
2230 {
2231   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2232   Mat_MPIDense *c;
2233   MPI_Comm      comm;
2234   PetscInt      pbegin, pend;
2235 
2236   PetscFunctionBegin;
2237   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2238   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2239   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2240   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
2241   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
2242   if (!a->cmat) {
2243     PetscCall(MatCreate(comm, &a->cmat));
2244     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
2245     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
2246     else {
2247       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2248       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2249       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2250     }
2251     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2252     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2253   } else {
2254     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
2255     if (same && a->cmat->rmap->N != A->rmap->N) {
2256       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
2257       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2258     }
2259     if (!same) {
2260       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
2261       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
2262       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2263       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2264       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2265     }
2266     if (cend - cbegin != a->cmat->cmap->N) {
2267       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
2268       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
2269       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2270       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2271     }
2272   }
2273   c = (Mat_MPIDense *)a->cmat->data;
2274   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2275   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
2276 
2277   a->cmat->preallocated = PETSC_TRUE;
2278   a->cmat->assembled    = PETSC_TRUE;
2279 #if defined(PETSC_HAVE_DEVICE)
2280   a->cmat->offloadmask = c->A->offloadmask;
2281 #endif
2282   a->matinuse = cbegin + 1;
2283   *v          = a->cmat;
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
2288 {
2289   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2290   Mat_MPIDense *c;
2291 
2292   PetscFunctionBegin;
2293   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
2294   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
2295   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
2296   a->matinuse = 0;
2297   c           = (Mat_MPIDense *)a->cmat->data;
2298   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
2299   if (v) *v = NULL;
2300 #if defined(PETSC_HAVE_DEVICE)
2301   A->offloadmask = a->A->offloadmask;
2302 #endif
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 /*MC
2307    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
2308 
2309    Options Database Keys:
2310 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
2311 
2312   Level: beginner
2313 
2314 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
2315 M*/
2316 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
2317 {
2318   Mat_MPIDense *a;
2319 
2320   PetscFunctionBegin;
2321   PetscCall(PetscNew(&a));
2322   mat->data = (void *)a;
2323   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
2324 
2325   mat->insertmode = NOT_SET_VALUES;
2326 
2327   /* build cache for off array entries formed */
2328   a->donotstash = PETSC_FALSE;
2329 
2330   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
2331 
2332   /* stuff used for matrix vector multiply */
2333   a->lvec        = NULL;
2334   a->Mvctx       = NULL;
2335   a->roworiented = PETSC_TRUE;
2336 
2337   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
2338   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
2339   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
2340   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
2341   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
2342   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
2343   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
2344   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
2345   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
2346   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
2347   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
2348   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
2349   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
2350   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
2351   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
2352   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
2353   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
2354   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
2355   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
2356   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
2357   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
2358 #if defined(PETSC_HAVE_ELEMENTAL)
2359   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
2360 #endif
2361 #if defined(PETSC_HAVE_SCALAPACK)
2362   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
2363 #endif
2364 #if defined(PETSC_HAVE_CUDA)
2365   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
2366 #endif
2367   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
2368   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2369   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2370 #if defined(PETSC_HAVE_CUDA)
2371   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2372   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2373 #endif
2374 #if defined(PETSC_HAVE_HIP)
2375   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
2376   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2377   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2378 #endif
2379   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
2380   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
2381   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 /*MC
2386    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2387 
2388    Options Database Keys:
2389 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
2390 
2391   Level: beginner
2392 
2393 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
2394 M*/
2395 #if defined(PETSC_HAVE_CUDA)
2396   #include <petsc/private/deviceimpl.h>
2397 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2398 {
2399   PetscFunctionBegin;
2400   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2401   PetscCall(MatCreate_MPIDense(B));
2402   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
2403   PetscFunctionReturn(0);
2404 }
2405 #endif
2406 
2407 /*MC
2408    MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs.
2409 
2410    Options Database Keys:
2411 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()`
2412 
2413   Level: beginner
2414 
2415 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP`
2416 M*/
2417 #if defined(PETSC_HAVE_HIP)
2418   #include <petsc/private/deviceimpl.h>
2419 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B)
2420 {
2421   PetscFunctionBegin;
2422   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
2423   PetscCall(MatCreate_MPIDense(B));
2424   PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B));
2425   PetscFunctionReturn(0);
2426 }
2427 #endif
2428 
2429 /*MC
2430    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2431 
2432    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2433    and `MATMPIDENSE` otherwise.
2434 
2435    Options Database Keys:
2436 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
2437 
2438   Level: beginner
2439 
2440 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
2441 M*/
2442 
2443 /*MC
2444    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2445    Similarly,
2446    MATDENSEHIP - MATDENSEHIP = "densehip"
2447 
2448    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2449    and `MATMPIDENSECUDA` otherwise.
2450 
2451    Options Database Keys:
2452 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
2453 
2454   Level: beginner
2455 
2456 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2457 M*/
2458 
2459 /*MC
2460    MATDENSEHIP - MATDENSEHIP = "densehip" - A matrix type to be used for dense matrices on GPUs.
2461 
2462    This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator,
2463    and `MATMPIDENSEHIP` otherwise.
2464 
2465    Options Database Keys:
2466 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()`
2467 
2468   Level: beginner
2469 
2470 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2471 M*/
2472 
2473 /*@C
2474    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2475 
2476    Collective
2477 
2478    Input Parameters:
2479 .  B - the matrix
2480 -  data - optional location of matrix data.  Set data=NULL for PETSc
2481    to control all matrix memory allocation.
2482 
2483    Notes:
2484    The dense format is fully compatible with standard Fortran 77
2485    storage by columns.
2486 
2487    The data input variable is intended primarily for Fortran programmers
2488    who wish to allocate their own matrix memory space.  Most users should
2489    set data=NULL.
2490 
2491    Level: intermediate
2492 
2493 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2494 @*/
2495 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2496 {
2497   PetscFunctionBegin;
2498   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2499   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 /*@
2504    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2505    array provided by the user. This is useful to avoid copying an array
2506    into a matrix
2507 
2508    Not Collective
2509 
2510    Input Parameters:
2511 +  mat - the matrix
2512 -  array - the array in column major order
2513 
2514    Note:
2515    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2516    freed when the matrix is destroyed.
2517 
2518    Level: developer
2519 
2520 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2521           `MatDenseReplaceArray()`
2522 @*/
2523 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2524 {
2525   PetscFunctionBegin;
2526   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2527   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2528   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2529 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2530   mat->offloadmask = PETSC_OFFLOAD_CPU;
2531 #endif
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 /*@
2536    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2537 
2538    Not Collective
2539 
2540    Input Parameters:
2541 .  mat - the matrix
2542 
2543    Note:
2544    You can only call this after a call to `MatDensePlaceArray()`
2545 
2546    Level: developer
2547 
2548 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2549 @*/
2550 PetscErrorCode MatDenseResetArray(Mat mat)
2551 {
2552   PetscFunctionBegin;
2553   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2554   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2555   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 /*@
2560    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2561    array provided by the user. This is useful to avoid copying an array
2562    into a matrix
2563 
2564    Not Collective
2565 
2566    Input Parameters:
2567 +  mat - the matrix
2568 -  array - the array in column major order
2569 
2570    Note:
2571    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2572    freed by the user. It will be freed when the matrix is destroyed.
2573 
2574    Level: developer
2575 
2576 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2577 @*/
2578 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2579 {
2580   PetscFunctionBegin;
2581   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2582   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2583   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2584 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2585   mat->offloadmask = PETSC_OFFLOAD_CPU;
2586 #endif
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #if defined(PETSC_HAVE_CUDA)
2591 /*@C
2592    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2593    array provided by the user. This is useful to avoid copying an array
2594    into a matrix
2595 
2596    Not Collective
2597 
2598    Input Parameters:
2599 +  mat - the matrix
2600 -  array - the array in column major order
2601 
2602    Note:
2603    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2604    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2605 
2606    Level: developer
2607 
2608 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2609 @*/
2610 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2611 {
2612   PetscFunctionBegin;
2613   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2614   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2615   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2616   mat->offloadmask = PETSC_OFFLOAD_GPU;
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 /*@C
2621    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2622 
2623    Not Collective
2624 
2625    Input Parameters:
2626 .  mat - the matrix
2627 
2628    Note:
2629    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2630 
2631    Level: developer
2632 
2633 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2634 @*/
2635 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2636 {
2637   PetscFunctionBegin;
2638   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2639   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2640   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2641   PetscFunctionReturn(0);
2642 }
2643 
2644 /*@C
2645    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2646    array provided by the user. This is useful to avoid copying an array
2647    into a matrix
2648 
2649    Not Collective
2650 
2651    Input Parameters:
2652 +  mat - the matrix
2653 -  array - the array in column major order
2654 
2655    Note:
2656    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2657    The memory passed in CANNOT be freed by the user. It will be freed
2658    when the matrix is destroyed. The array should respect the matrix leading dimension.
2659 
2660    Level: developer
2661 
2662 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2663 @*/
2664 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2665 {
2666   PetscFunctionBegin;
2667   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2668   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2669   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2670   mat->offloadmask = PETSC_OFFLOAD_GPU;
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 /*@C
2675    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2676 
2677    Not Collective
2678 
2679    Input Parameters:
2680 .  A - the matrix
2681 
2682    Output Parameters
2683 .  array - the GPU array in column major order
2684 
2685    Notes:
2686    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2687 
2688    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2689 
2690    Level: developer
2691 
2692 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2693 @*/
2694 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2695 {
2696   PetscFunctionBegin;
2697   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2698   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2699   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 /*@C
2704    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2705 
2706    Not Collective
2707 
2708    Input Parameters:
2709 +  A - the matrix
2710 -  array - the GPU array in column major order
2711 
2712    Level: developer
2713 
2714 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2715 @*/
2716 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2717 {
2718   PetscFunctionBegin;
2719   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2720   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2721   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2722   A->offloadmask = PETSC_OFFLOAD_GPU;
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 /*@C
2727    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2728 
2729    Not Collective
2730 
2731    Input Parameters:
2732 .  A - the matrix
2733 
2734    Output Parameters
2735 .  array - the GPU array in column major order
2736 
2737    Note:
2738    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2739 
2740    Level: developer
2741 
2742 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2743 @*/
2744 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2745 {
2746   PetscFunctionBegin;
2747   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2748   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 /*@C
2753    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2754 
2755    Not Collective
2756 
2757    Input Parameters:
2758 +  A - the matrix
2759 -  array - the GPU array in column major order
2760 
2761    Note:
2762    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2763 
2764    Level: developer
2765 
2766 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2767 @*/
2768 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2769 {
2770   PetscFunctionBegin;
2771   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 /*@C
2776    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2777 
2778    Not Collective
2779 
2780    Input Parameters:
2781 .  A - the matrix
2782 
2783    Output Parameters
2784 .  array - the GPU array in column major order
2785 
2786    Note:
2787    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2788 
2789    Level: developer
2790 
2791 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2792 @*/
2793 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2794 {
2795   PetscFunctionBegin;
2796   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2797   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2798   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 /*@C
2803    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2804 
2805    Not Collective
2806 
2807    Input Parameters:
2808 +  A - the matrix
2809 -  array - the GPU array in column major order
2810 
2811    Level: developer
2812 
2813 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2814 @*/
2815 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2816 {
2817   PetscFunctionBegin;
2818   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2819   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2820   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2821   A->offloadmask = PETSC_OFFLOAD_GPU;
2822   PetscFunctionReturn(0);
2823 }
2824 #endif
2825 
2826 #if defined(PETSC_HAVE_HIP)
2827 /*@C
2828    MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2829    array provided by the user. This is useful to avoid copying an array
2830    into a matrix
2831 
2832    Not Collective
2833 
2834    Input Parameters:
2835 +  mat - the matrix
2836 -  array - the array in column major order
2837 
2838    Notes:
2839    You can return to the original array with a call to MatDenseHIPResetArray(). The user is responsible for freeing this array; it will not be
2840    freed when the matrix is destroyed. The array must have been allocated with hipMalloc().
2841 
2842    Level: developer
2843 
2844 .seealso: MatDenseHIPGetArray(), MatDenseHIPResetArray()
2845 @*/
2846 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array)
2847 {
2848   PetscFunctionBegin;
2849   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2850   PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2851   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2852   mat->offloadmask = PETSC_OFFLOAD_GPU;
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 /*@C
2857    MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to MatDenseHIPPlaceArray()
2858 
2859    Not Collective
2860 
2861    Input Parameters:
2862 .  mat - the matrix
2863 
2864    Notes:
2865    You can only call this after a call to MatDenseHIPPlaceArray()
2866 
2867    Level: developer
2868 
2869 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray()
2870 
2871 @*/
2872 PetscErrorCode MatDenseHIPResetArray(Mat mat)
2873 {
2874   PetscFunctionBegin;
2875   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2876   PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat));
2877   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 /*@C
2882    MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2883    array provided by the user. This is useful to avoid copying an array
2884    into a matrix
2885 
2886    Not Collective
2887 
2888    Input Parameters:
2889 +  mat - the matrix
2890 -  array - the array in column major order
2891 
2892    Notes:
2893    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2894    The memory passed in CANNOT be freed by the user. It will be freed
2895    when the matrix is destroyed. The array should respect the matrix leading dimension.
2896 
2897    Level: developer
2898 
2899 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray()
2900 @*/
2901 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array)
2902 {
2903   PetscFunctionBegin;
2904   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2905   PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2906   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2907   mat->offloadmask = PETSC_OFFLOAD_GPU;
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 /*@C
2912    MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix.
2913 
2914    Not Collective
2915 
2916    Input Parameters:
2917 .  A - the matrix
2918 
2919    Output Parameters
2920 .  array - the GPU array in column major order
2921 
2922    Notes:
2923    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseHIPGetArray(). The array must be restored with MatDenseHIPRestoreArrayWrite() when no longer needed.
2924 
2925    Level: developer
2926 
2927 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead()
2928 @*/
2929 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a)
2930 {
2931   PetscFunctionBegin;
2932   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2933   PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2934   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 /*@C
2939    MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArrayWrite().
2940 
2941    Not Collective
2942 
2943    Input Parameters:
2944 +  A - the matrix
2945 -  array - the GPU array in column major order
2946 
2947    Notes:
2948 
2949    Level: developer
2950 
2951 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2952 @*/
2953 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a)
2954 {
2955   PetscFunctionBegin;
2956   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2957   PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2958   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2959   A->offloadmask = PETSC_OFFLOAD_GPU;
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 /*@C
2964    MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed.
2965 
2966    Not Collective
2967 
2968    Input Parameters:
2969 .  A - the matrix
2970 
2971    Output Parameters
2972 .  array - the GPU array in column major order
2973 
2974    Notes:
2975    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().
2976 
2977    Level: developer
2978 
2979 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2980 @*/
2981 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a)
2982 {
2983   PetscFunctionBegin;
2984   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2985   PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 /*@C
2990    MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead().
2991 
2992    Not Collective
2993 
2994    Input Parameters:
2995 +  A - the matrix
2996 -  array - the GPU array in column major order
2997 
2998    Notes:
2999    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().
3000 
3001    Level: developer
3002 
3003 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead()
3004 @*/
3005 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a)
3006 {
3007   PetscFunctionBegin;
3008   PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 /*@C
3013    MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed.
3014 
3015    Not Collective
3016 
3017    Input Parameters:
3018 .  A - the matrix
3019 
3020    Output Parameters
3021 .  array - the GPU array in column major order
3022 
3023    Notes:
3024    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). For read-only access, use MatDenseHIPGetArrayRead().
3025 
3026    Level: developer
3027 
3028 .seealso: MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
3029 @*/
3030 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a)
3031 {
3032   PetscFunctionBegin;
3033   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3034   PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a));
3035   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3036   PetscFunctionReturn(0);
3037 }
3038 
3039 /*@C
3040    MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray().
3041 
3042    Not Collective
3043 
3044    Input Parameters:
3045 +  A - the matrix
3046 -  array - the GPU array in column major order
3047 
3048    Notes:
3049 
3050    Level: developer
3051 
3052 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
3053 @*/
3054 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a)
3055 {
3056   PetscFunctionBegin;
3057   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3058   PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a));
3059   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3060   A->offloadmask = PETSC_OFFLOAD_GPU;
3061   PetscFunctionReturn(0);
3062 }
3063 #endif
3064 
3065 /*@C
3066    MatCreateDense - Creates a matrix in `MATDENSE` format.
3067 
3068    Collective
3069 
3070    Input Parameters:
3071 +  comm - MPI communicator
3072 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3073 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3074 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3075 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3076 -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
3077    to control all matrix memory allocation.
3078 
3079    Output Parameter:
3080 .  A - the matrix
3081 
3082    Notes:
3083    The dense format is fully compatible with standard Fortran 77
3084    storage by columns.
3085 
3086    Although local portions of the matrix are stored in column-major
3087    order, the matrix is partitioned across MPI ranks by row.
3088 
3089    The data input variable is intended primarily for Fortran programmers
3090    who wish to allocate their own matrix memory space.  Most users should
3091    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
3092 
3093    The user MUST specify either the local or global matrix dimensions
3094    (possibly both).
3095 
3096    Level: intermediate
3097 
3098 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
3099 @*/
3100 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3101 {
3102   PetscFunctionBegin;
3103   PetscCall(MatCreate(comm, A));
3104   PetscCall(MatSetSizes(*A, m, n, M, N));
3105   PetscCall(MatSetType(*A, MATDENSE));
3106   PetscCall(MatSeqDenseSetPreallocation(*A, data));
3107   PetscCall(MatMPIDenseSetPreallocation(*A, data));
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 #if defined(PETSC_HAVE_CUDA)
3112 /*@C
3113    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
3114 
3115    Collective
3116 
3117    Input Parameters:
3118 +  comm - MPI communicator
3119 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3120 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3121 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3122 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3123 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
3124    to control matrix memory allocation.
3125 
3126    Output Parameter:
3127 .  A - the matrix
3128 
3129    Level: intermediate
3130 
3131 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
3132 @*/
3133 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3134 {
3135   PetscFunctionBegin;
3136   PetscCall(MatCreate(comm, A));
3137   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3138   PetscCall(MatSetSizes(*A, m, n, M, N));
3139   PetscCall(MatSetType(*A, MATDENSECUDA));
3140   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
3141   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
3142   PetscFunctionReturn(0);
3143 }
3144 #endif
3145 
3146 #if defined(PETSC_HAVE_HIP)
3147 /*@C
3148    MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP.
3149 
3150    Collective
3151 
3152    Input Parameters:
3153 +  comm - MPI communicator
3154 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3155 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3156 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3157 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3158 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
3159    to control matrix memory allocation.
3160 
3161    Output Parameter:
3162 .  A - the matrix
3163 
3164    Level: intermediate
3165 
3166 .seealso: `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()`
3167 @*/
3168 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3169 {
3170   PetscFunctionBegin;
3171   PetscCall(MatCreate(comm, A));
3172   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3173   PetscCall(MatSetSizes(*A, m, n, M, N));
3174   PetscCall(MatSetType(*A, MATDENSEHIP));
3175   PetscCall(MatSeqDenseHIPSetPreallocation(*A, data));
3176   PetscCall(MatMPIDenseHIPSetPreallocation(*A, data));
3177   PetscFunctionReturn(0);
3178 }
3179 #endif
3180 
3181 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
3182 {
3183   Mat           mat;
3184   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
3185 
3186   PetscFunctionBegin;
3187   *newmat = NULL;
3188   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
3189   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3190   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
3191   a = (Mat_MPIDense *)mat->data;
3192 
3193   mat->factortype   = A->factortype;
3194   mat->assembled    = PETSC_TRUE;
3195   mat->preallocated = PETSC_TRUE;
3196 
3197   mat->insertmode = NOT_SET_VALUES;
3198   a->donotstash   = oldmat->donotstash;
3199 
3200   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
3201   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
3202 
3203   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3204 
3205   *newmat = mat;
3206   PetscFunctionReturn(0);
3207 }
3208 
3209 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
3210 {
3211   PetscBool isbinary;
3212 #if defined(PETSC_HAVE_HDF5)
3213   PetscBool ishdf5;
3214 #endif
3215 
3216   PetscFunctionBegin;
3217   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3218   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3219   /* force binary viewer to load .info file if it has not yet done so */
3220   PetscCall(PetscViewerSetUp(viewer));
3221   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3222 #if defined(PETSC_HAVE_HDF5)
3223   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3224 #endif
3225   if (isbinary) {
3226     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
3227 #if defined(PETSC_HAVE_HDF5)
3228   } else if (ishdf5) {
3229     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
3230 #endif
3231   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
3236 {
3237   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
3238   Mat           a, b;
3239 
3240   PetscFunctionBegin;
3241   a = matA->A;
3242   b = matB->A;
3243   PetscCall(MatEqual(a, b, flag));
3244   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
3249 {
3250   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
3251 
3252   PetscFunctionBegin;
3253   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
3254   PetscCall(MatDestroy(&atb->atb));
3255   PetscCall(PetscFree(atb));
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
3260 {
3261   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
3262 
3263   PetscFunctionBegin;
3264   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
3265   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
3266   PetscCall(PetscFree(abt));
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3271 {
3272   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3273   Mat_TransMatMultDense *atb;
3274   MPI_Comm               comm;
3275   PetscMPIInt            size, *recvcounts;
3276   PetscScalar           *carray, *sendbuf;
3277   const PetscScalar     *atbarray;
3278   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
3279   const PetscInt        *ranges;
3280 
3281   PetscFunctionBegin;
3282   MatCheckProduct(C, 3);
3283   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3284   atb        = (Mat_TransMatMultDense *)C->product->data;
3285   recvcounts = atb->recvcounts;
3286   sendbuf    = atb->sendbuf;
3287 
3288   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3289   PetscCallMPI(MPI_Comm_size(comm, &size));
3290 
3291   /* compute atbarray = aseq^T * bseq */
3292   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
3293 
3294   PetscCall(MatGetOwnershipRanges(C, &ranges));
3295 
3296   /* arrange atbarray into sendbuf */
3297   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
3298   PetscCall(MatDenseGetLDA(atb->atb, &lda));
3299   for (proc = 0, k = 0; proc < size; proc++) {
3300     for (j = 0; j < cN; j++) {
3301       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
3302     }
3303   }
3304   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
3305 
3306   /* sum all atbarray to local values of C */
3307   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
3308   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
3309   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
3310   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3311   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3316 {
3317   MPI_Comm               comm;
3318   PetscMPIInt            size;
3319   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
3320   Mat_TransMatMultDense *atb;
3321   PetscBool              cisdense = PETSC_FALSE;
3322   PetscInt               i;
3323   const PetscInt        *ranges;
3324 
3325   PetscFunctionBegin;
3326   MatCheckProduct(C, 4);
3327   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3328   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3329   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
3330     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3331   }
3332 
3333   /* create matrix product C */
3334   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
3335 #if defined(PETSC_HAVE_CUDA)
3336   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
3337 #endif
3338 #if defined(PETSC_HAVE_HIP)
3339   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
3340 #endif
3341   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
3342   PetscCall(MatSetUp(C));
3343 
3344   /* create data structure for reuse C */
3345   PetscCallMPI(MPI_Comm_size(comm, &size));
3346   PetscCall(PetscNew(&atb));
3347   cM = C->rmap->N;
3348   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
3349   PetscCall(MatGetOwnershipRanges(C, &ranges));
3350   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
3351 
3352   C->product->data    = atb;
3353   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3358 {
3359   MPI_Comm               comm;
3360   PetscMPIInt            i, size;
3361   PetscInt               maxRows, bufsiz;
3362   PetscMPIInt            tag;
3363   PetscInt               alg;
3364   Mat_MatTransMultDense *abt;
3365   Mat_Product           *product = C->product;
3366   PetscBool              flg;
3367 
3368   PetscFunctionBegin;
3369   MatCheckProduct(C, 4);
3370   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3371   /* check local size of A and B */
3372   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
3373 
3374   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
3375   alg = flg ? 0 : 1;
3376 
3377   /* setup matrix product C */
3378   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
3379   PetscCall(MatSetType(C, MATMPIDENSE));
3380   PetscCall(MatSetUp(C));
3381   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
3382 
3383   /* create data structure for reuse C */
3384   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3385   PetscCallMPI(MPI_Comm_size(comm, &size));
3386   PetscCall(PetscNew(&abt));
3387   abt->tag = tag;
3388   abt->alg = alg;
3389   switch (alg) {
3390   case 1: /* alg: "cyclic" */
3391     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
3392     bufsiz = A->cmap->N * maxRows;
3393     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
3394     break;
3395   default: /* alg: "allgatherv" */
3396     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
3397     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
3398     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
3399     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
3400     break;
3401   }
3402 
3403   C->product->data                = abt;
3404   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
3405   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
3410 {
3411   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3412   Mat_MatTransMultDense *abt;
3413   MPI_Comm               comm;
3414   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
3415   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
3416   PetscInt               i, cK             = A->cmap->N, k, j, bn;
3417   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3418   const PetscScalar     *av, *bv;
3419   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
3420   MPI_Request            reqs[2];
3421   const PetscInt        *ranges;
3422 
3423   PetscFunctionBegin;
3424   MatCheckProduct(C, 3);
3425   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3426   abt = (Mat_MatTransMultDense *)C->product->data;
3427   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3428   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3429   PetscCallMPI(MPI_Comm_size(comm, &size));
3430   PetscCall(MatDenseGetArrayRead(a->A, &av));
3431   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3432   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3433   PetscCall(MatDenseGetLDA(a->A, &i));
3434   PetscCall(PetscBLASIntCast(i, &alda));
3435   PetscCall(MatDenseGetLDA(b->A, &i));
3436   PetscCall(PetscBLASIntCast(i, &blda));
3437   PetscCall(MatDenseGetLDA(c->A, &i));
3438   PetscCall(PetscBLASIntCast(i, &clda));
3439   PetscCall(MatGetOwnershipRanges(B, &ranges));
3440   bn = B->rmap->n;
3441   if (blda == bn) {
3442     sendbuf = (PetscScalar *)bv;
3443   } else {
3444     sendbuf = abt->buf[0];
3445     for (k = 0, i = 0; i < cK; i++) {
3446       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
3447     }
3448   }
3449   if (size > 1) {
3450     sendto   = (rank + size - 1) % size;
3451     recvfrom = (rank + size + 1) % size;
3452   } else {
3453     sendto = recvfrom = 0;
3454   }
3455   PetscCall(PetscBLASIntCast(cK, &ck));
3456   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3457   recvisfrom = rank;
3458   for (i = 0; i < size; i++) {
3459     /* we have finished receiving in sending, bufs can be read/modified */
3460     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
3461     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
3462 
3463     if (nextrecvisfrom != rank) {
3464       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
3465       sendsiz = cK * bn;
3466       recvsiz = cK * nextbn;
3467       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
3468       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
3469       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
3470     }
3471 
3472     /* local aseq * sendbuf^T */
3473     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
3474     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
3475 
3476     if (nextrecvisfrom != rank) {
3477       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
3478       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
3479     }
3480     bn         = nextbn;
3481     recvisfrom = nextrecvisfrom;
3482     sendbuf    = recvbuf;
3483   }
3484   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3485   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3486   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3487   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3488   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
3493 {
3494   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3495   Mat_MatTransMultDense *abt;
3496   MPI_Comm               comm;
3497   PetscMPIInt            size;
3498   PetscScalar           *cv, *sendbuf, *recvbuf;
3499   const PetscScalar     *av, *bv;
3500   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
3501   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3502   PetscBLASInt           cm, cn, ck, alda, clda;
3503 
3504   PetscFunctionBegin;
3505   MatCheckProduct(C, 3);
3506   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3507   abt = (Mat_MatTransMultDense *)C->product->data;
3508   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3509   PetscCallMPI(MPI_Comm_size(comm, &size));
3510   PetscCall(MatDenseGetArrayRead(a->A, &av));
3511   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3512   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3513   PetscCall(MatDenseGetLDA(a->A, &i));
3514   PetscCall(PetscBLASIntCast(i, &alda));
3515   PetscCall(MatDenseGetLDA(b->A, &blda));
3516   PetscCall(MatDenseGetLDA(c->A, &i));
3517   PetscCall(PetscBLASIntCast(i, &clda));
3518   /* copy transpose of B into buf[0] */
3519   bn      = B->rmap->n;
3520   sendbuf = abt->buf[0];
3521   recvbuf = abt->buf[1];
3522   for (k = 0, j = 0; j < bn; j++) {
3523     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
3524   }
3525   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3526   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
3527   PetscCall(PetscBLASIntCast(cK, &ck));
3528   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3529   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
3530   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
3531   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3532   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3533   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3534   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3535   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3540 {
3541   Mat_MatTransMultDense *abt;
3542 
3543   PetscFunctionBegin;
3544   MatCheckProduct(C, 3);
3545   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3546   abt = (Mat_MatTransMultDense *)C->product->data;
3547   switch (abt->alg) {
3548   case 1:
3549     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
3550     break;
3551   default:
3552     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
3553     break;
3554   }
3555   PetscFunctionReturn(0);
3556 }
3557 
3558 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
3559 {
3560   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
3561 
3562   PetscFunctionBegin;
3563   PetscCall(MatDestroy(&ab->Ce));
3564   PetscCall(MatDestroy(&ab->Ae));
3565   PetscCall(MatDestroy(&ab->Be));
3566   PetscCall(PetscFree(ab));
3567   PetscFunctionReturn(0);
3568 }
3569 
3570 #if defined(PETSC_HAVE_ELEMENTAL)
3571 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3572 {
3573   Mat_MatMultDense *ab;
3574 
3575   PetscFunctionBegin;
3576   MatCheckProduct(C, 3);
3577   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
3578   ab = (Mat_MatMultDense *)C->product->data;
3579   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
3580   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
3581   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
3582   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
3583   PetscFunctionReturn(0);
3584 }
3585 
3586 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3587 {
3588   Mat               Ae, Be, Ce;
3589   Mat_MatMultDense *ab;
3590 
3591   PetscFunctionBegin;
3592   MatCheckProduct(C, 4);
3593   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3594   /* check local size of A and B */
3595   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
3596     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3597   }
3598 
3599   /* create elemental matrices Ae and Be */
3600   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
3601   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
3602   PetscCall(MatSetType(Ae, MATELEMENTAL));
3603   PetscCall(MatSetUp(Ae));
3604   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
3605 
3606   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
3607   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
3608   PetscCall(MatSetType(Be, MATELEMENTAL));
3609   PetscCall(MatSetUp(Be));
3610   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
3611 
3612   /* compute symbolic Ce = Ae*Be */
3613   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
3614   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
3615 
3616   /* setup C */
3617   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3618   PetscCall(MatSetType(C, MATDENSE));
3619   PetscCall(MatSetUp(C));
3620 
3621   /* create data structure for reuse Cdense */
3622   PetscCall(PetscNew(&ab));
3623   ab->Ae = Ae;
3624   ab->Be = Be;
3625   ab->Ce = Ce;
3626 
3627   C->product->data       = ab;
3628   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
3629   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3630   PetscFunctionReturn(0);
3631 }
3632 #endif
3633 /* ----------------------------------------------- */
3634 #if defined(PETSC_HAVE_ELEMENTAL)
3635 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3636 {
3637   PetscFunctionBegin;
3638   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3639   C->ops->productsymbolic = MatProductSymbolic_AB;
3640   PetscFunctionReturn(0);
3641 }
3642 #endif
3643 
3644 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3645 {
3646   Mat_Product *product = C->product;
3647   Mat          A = product->A, B = product->B;
3648 
3649   PetscFunctionBegin;
3650   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3651     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3652   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3653   C->ops->productsymbolic          = MatProductSymbolic_AtB;
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3658 {
3659   Mat_Product *product     = C->product;
3660   const char  *algTypes[2] = {"allgatherv", "cyclic"};
3661   PetscInt     alg, nalg = 2;
3662   PetscBool    flg = PETSC_FALSE;
3663 
3664   PetscFunctionBegin;
3665   /* Set default algorithm */
3666   alg = 0; /* default is allgatherv */
3667   PetscCall(PetscStrcmp(product->alg, "default", &flg));
3668   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3669 
3670   /* Get runtime option */
3671   if (product->api_user) {
3672     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
3673     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
3674     PetscOptionsEnd();
3675   } else {
3676     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
3677     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
3678     PetscOptionsEnd();
3679   }
3680   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3681 
3682   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3683   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3684   PetscFunctionReturn(0);
3685 }
3686 
3687 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3688 {
3689   Mat_Product *product = C->product;
3690 
3691   PetscFunctionBegin;
3692   switch (product->type) {
3693 #if defined(PETSC_HAVE_ELEMENTAL)
3694   case MATPRODUCT_AB:
3695     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
3696     break;
3697 #endif
3698   case MATPRODUCT_AtB:
3699     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3700     break;
3701   case MATPRODUCT_ABt:
3702     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3703     break;
3704   default:
3705     break;
3706   }
3707   PetscFunctionReturn(0);
3708 }
3709