xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 12facf1b2b728ba534ad2f7a1cbdf48236a5076e)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the sequential or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscValidPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(0);
42 }
43 
44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(0);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(0);
70 }
71 
72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(0);
94 }
95 
96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if defined(PETSC_HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif (PETSC_HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(0);
129 }
130 
131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(0);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if defined(PETSC_HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif (PETSC_HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(0);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(0);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(0);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(0);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(0);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(0);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(0);
367 }
368 
369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(0);
376 }
377 
378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(0);
390 }
391 
392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(0);
424 }
425 
426 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(0);
433 }
434 
435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(0);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(0);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
554   PetscScalar       *x, zero = 0.0;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecSet(v, zero));
559   PetscCall(VecGetArray(v, &x));
560   PetscCall(VecGetSize(v, &n));
561   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(0);
570 }
571 
572 PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
579 #endif
580   PetscCall(MatStashDestroy_Private(&mat->stash));
581   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583   PetscCall(MatDestroy(&mdn->A));
584   PetscCall(VecDestroy(&mdn->lvec));
585   PetscCall(PetscSFDestroy(&mdn->Mvctx));
586   PetscCall(VecDestroy(&mdn->cvec));
587   PetscCall(MatDestroy(&mdn->cmat));
588 
589   PetscCall(PetscFree(mat->data));
590   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
591 
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605 #if defined(PETSC_HAVE_ELEMENTAL)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607 #endif
608 #if defined(PETSC_HAVE_SCALAPACK)
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610 #endif
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614 #if defined(PETSC_HAVE_CUDA)
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632 #endif
633 #if defined(PETSC_HAVE_HIP)
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(0);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(0);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(0);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(0);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(PetscViewerFlush(viewer));
747     PetscCall(MatDestroy(&A));
748   }
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753 {
754   PetscBool iascii, isbinary, isdraw, issocket;
755 
756   PetscFunctionBegin;
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
761 
762   if (iascii || issocket || isdraw) {
763     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771   Mat            mdn = mat->A;
772   PetscLogDouble isend[5], irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size = 1.0;
776 
777   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
778 
779   isend[0] = info->nz_used;
780   isend[1] = info->nz_allocated;
781   isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;
783   isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(0);
811 }
812 
813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A, 1);
823     PetscCall(MatSetOption(a->A, op, flg));
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A, 1);
827     a->roworiented = flg;
828     PetscCall(MatSetOption(a->A, op, flg));
829     break;
830   case MAT_FORCE_DIAGONAL_ENTRIES:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833   case MAT_SORTED_FULL:
834     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835     break;
836   case MAT_IGNORE_OFF_PROC_ENTRIES:
837     a->donotstash = flg;
838     break;
839   case MAT_SYMMETRIC:
840   case MAT_STRUCTURALLY_SYMMETRIC:
841   case MAT_HERMITIAN:
842   case MAT_SYMMETRY_ETERNAL:
843   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844   case MAT_SPD:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846   case MAT_IGNORE_ZERO_ENTRIES:
847   case MAT_SPD_ETERNAL:
848     /* if the diagonal matrix is square it inherits some of the properties above */
849     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850     break;
851   default:
852     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853   }
854   PetscFunctionReturn(0);
855 }
856 
857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860   const PetscScalar *l;
861   PetscScalar        x, *v, *vv, *r;
862   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
863 
864   PetscFunctionBegin;
865   PetscCall(MatDenseGetArray(mdn->A, &vv));
866   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867   PetscCall(MatGetLocalSize(A, &s2, &s3));
868   if (ll) {
869     PetscCall(VecGetLocalSize(ll, &s2a));
870     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871     PetscCall(VecGetArrayRead(ll, &l));
872     for (i = 0; i < m; i++) {
873       x = l[i];
874       v = vv + i;
875       for (j = 0; j < n; j++) {
876         (*v) *= x;
877         v += lda;
878       }
879     }
880     PetscCall(VecRestoreArrayRead(ll, &l));
881     PetscCall(PetscLogFlops(1.0 * n * m));
882   }
883   if (rr) {
884     const PetscScalar *ar;
885 
886     PetscCall(VecGetLocalSize(rr, &s3a));
887     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888     PetscCall(VecGetArrayRead(rr, &ar));
889     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890     PetscCall(VecGetArray(mdn->lvec, &r));
891     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(VecRestoreArrayRead(rr, &ar));
894     for (i = 0; i < n; i++) {
895       x = r[i];
896       v = vv + i * lda;
897       for (j = 0; j < m; j++) (*v++) *= x;
898     }
899     PetscCall(VecRestoreArray(mdn->lvec, &r));
900     PetscCall(PetscLogFlops(1.0 * n * m));
901   }
902   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907 {
908   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909   PetscInt           i, j;
910   PetscMPIInt        size;
911   PetscReal          sum = 0.0;
912   const PetscScalar *av, *v;
913 
914   PetscFunctionBegin;
915   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916   v = av;
917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918   if (size == 1) {
919     PetscCall(MatNorm(mdn->A, type, nrm));
920   } else {
921     if (type == NORM_FROBENIUS) {
922       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923         sum += PetscRealPart(PetscConj(*v) * (*v));
924         v++;
925       }
926       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927       *nrm = PetscSqrtReal(*nrm);
928       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929     } else if (type == NORM_1) {
930       PetscReal *tmp, *tmp2;
931       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932       *nrm = 0.0;
933       v    = av;
934       for (j = 0; j < mdn->A->cmap->n; j++) {
935         for (i = 0; i < mdn->A->rmap->n; i++) {
936           tmp[j] += PetscAbsScalar(*v);
937           v++;
938         }
939       }
940       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941       for (j = 0; j < A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       PetscCall(PetscFree2(tmp, tmp2));
945       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       PetscCall(MatNorm(mdn->A, type, &ntemp));
949       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951   }
952   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957 {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959   Mat           B;
960   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961   PetscInt      j, i, lda;
962   PetscScalar  *v;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971   } else B = *matout;
972 
973   m = a->A->rmap->n;
974   n = a->A->cmap->n;
975   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(MatDenseGetLDA(a->A, &lda));
977   PetscCall(PetscMalloc1(m, &rwork));
978   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979   for (j = 0; j < n; j++) {
980     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981     v += lda;
982   }
983   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984   PetscCall(PetscFree(rwork));
985   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     PetscCall(MatHeaderMerge(A, &B));
991   }
992   PetscFunctionReturn(0);
993 }
994 
995 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
997 
998 PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(PetscLayoutSetUp(A->rmap));
1002   PetscCall(PetscLayoutSetUp(A->cmap));
1003   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008 {
1009   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatConjugate(a->A));
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PetscErrorCode MatRealPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatRealPart(a->A));
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(MatImaginaryPart(a->A));
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044 {
1045   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1046 
1047   PetscFunctionBegin;
1048   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1055 
1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057 {
1058   PetscInt      i, m, n;
1059   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060   PetscReal    *work;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatGetSize(A, &m, &n));
1064   PetscCall(PetscMalloc1(n, &work));
1065   if (type == REDUCTION_MEAN_REALPART) {
1066     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069   } else {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071   }
1072   if (type == NORM_2) {
1073     for (i = 0; i < n; i++) work[i] *= work[i];
1074   }
1075   if (type == NORM_INFINITY) {
1076     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077   } else {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079   }
1080   PetscCall(PetscFree(work));
1081   if (type == NORM_2) {
1082     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084     for (i = 0; i < n; i++) reductions[i] /= m;
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #if defined(PETSC_HAVE_CUDA)
1090 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1091 {
1092   PetscScalar *da;
1093   PetscInt     lda;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(MatDenseCUDAGetArray(A, &da));
1097   PetscCall(MatDenseGetLDA(A, &lda));
1098   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1099   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1100   PetscCall(MatDenseCUDARestoreArray(A, &da));
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1105 {
1106   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1107   PetscInt      lda;
1108 
1109   PetscFunctionBegin;
1110   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1111   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1112   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1113   a->vecinuse = col + 1;
1114   PetscCall(MatDenseGetLDA(a->A, &lda));
1115   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1116   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1117   *v = a->cvec;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1122 {
1123   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1124 
1125   PetscFunctionBegin;
1126   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1127   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1128   a->vecinuse = 0;
1129   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1130   PetscCall(VecCUDAResetArray(a->cvec));
1131   if (v) *v = NULL;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136 {
1137   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138   PetscInt      lda;
1139 
1140   PetscFunctionBegin;
1141   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1142   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1143   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1144   a->vecinuse = col + 1;
1145   PetscCall(MatDenseGetLDA(a->A, &lda));
1146   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1147   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1148   PetscCall(VecLockReadPush(a->cvec));
1149   *v = a->cvec;
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1154 {
1155   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1159   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1160   a->vecinuse = 0;
1161   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1162   PetscCall(VecLockReadPop(a->cvec));
1163   PetscCall(VecCUDAResetArray(a->cvec));
1164   if (v) *v = NULL;
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1169 {
1170   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1171   PetscInt      lda;
1172 
1173   PetscFunctionBegin;
1174   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1175   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1176   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1177   a->vecinuse = col + 1;
1178   PetscCall(MatDenseGetLDA(a->A, &lda));
1179   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1180   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1181   *v = a->cvec;
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1186 {
1187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1188 
1189   PetscFunctionBegin;
1190   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1191   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1192   a->vecinuse = 0;
1193   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1194   PetscCall(VecCUDAResetArray(a->cvec));
1195   if (v) *v = NULL;
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1200 {
1201   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1205   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1206   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1211 {
1212   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1213 
1214   PetscFunctionBegin;
1215   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1216   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1217   PetscCall(MatDenseCUDAResetArray(l->A));
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1222 {
1223   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1224 
1225   PetscFunctionBegin;
1226   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1227   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1228   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1233 {
1234   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1242 {
1243   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1251 {
1252   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1253 
1254   PetscFunctionBegin;
1255   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1260 {
1261   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1262 
1263   PetscFunctionBegin;
1264   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1269 {
1270   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1271 
1272   PetscFunctionBegin;
1273   PetscCall(MatDenseCUDAGetArray(l->A, a));
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1278 {
1279   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1287 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1288 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1289 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1290 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1291 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1292 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1293 
1294 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1295 {
1296   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1297 
1298   PetscFunctionBegin;
1299   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1300   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1301   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1302   mat->boundtocpu = bind;
1303   if (!bind) {
1304     PetscBool iscuda;
1305 
1306     PetscCall(PetscFree(mat->defaultrandtype));
1307     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1308     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1309     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1310     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1311     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1312     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1313     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1314     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1315     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1316     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1317     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1318     mat->ops->shift = MatShift_MPIDenseCUDA;
1319   } else {
1320     PetscCall(PetscFree(mat->defaultrandtype));
1321     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1322     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1323     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1324     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1325     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1326     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1327     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1328     mat->ops->shift = MatShift_MPIDense;
1329   }
1330   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1335 {
1336   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1337   PetscBool     iscuda;
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1341   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1342   if (!iscuda) PetscFunctionReturn(0);
1343   PetscCall(PetscLayoutSetUp(A->rmap));
1344   PetscCall(PetscLayoutSetUp(A->cmap));
1345   if (!d->A) {
1346     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1347     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1348   }
1349   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1350   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1351   A->preallocated = PETSC_TRUE;
1352   A->assembled    = PETSC_TRUE;
1353   PetscFunctionReturn(0);
1354 }
1355 #endif
1356 
1357 #if defined(PETSC_HAVE_HIP)
1358 PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha)
1359 {
1360   PetscScalar *da;
1361   PetscInt     lda;
1362 
1363   PetscFunctionBegin;
1364   PetscCall(MatDenseHIPGetArray(A, &da));
1365   PetscCall(MatDenseGetLDA(A, &lda));
1366   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1367   PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1368   PetscCall(MatDenseHIPRestoreArray(A, &da));
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1373 {
1374   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1375   PetscInt      lda;
1376 
1377   PetscFunctionBegin;
1378   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1379   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1380   if (!a->cvec) {
1381     PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1382     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1383   }
1384   a->vecinuse = col + 1;
1385   PetscCall(MatDenseGetLDA(a->A, &lda));
1386   PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1387   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1388   *v = a->cvec;
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1393 {
1394   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1395 
1396   PetscFunctionBegin;
1397   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1398   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1399   a->vecinuse = 0;
1400   PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1401   PetscCall(VecHIPResetArray(a->cvec));
1402   if (v) *v = NULL;
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1407 {
1408   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1409   PetscInt      lda;
1410 
1411   PetscFunctionBegin;
1412   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1413   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1414   if (!a->cvec) {
1415     PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1416     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1417   }
1418   a->vecinuse = col + 1;
1419   PetscCall(MatDenseGetLDA(a->A, &lda));
1420   PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse));
1421   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1422   PetscCall(VecLockReadPush(a->cvec));
1423   *v = a->cvec;
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1428 {
1429   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1430 
1431   PetscFunctionBegin;
1432   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1433   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1434   a->vecinuse = 0;
1435   PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse));
1436   PetscCall(VecLockReadPop(a->cvec));
1437   PetscCall(VecHIPResetArray(a->cvec));
1438   if (v) *v = NULL;
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1443 {
1444   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1445   PetscInt      lda;
1446 
1447   PetscFunctionBegin;
1448   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1449   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1450   if (!a->cvec) {
1451     PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1452     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec));
1453   }
1454   a->vecinuse = col + 1;
1455   PetscCall(MatDenseGetLDA(a->A, &lda));
1456   PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1457   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1458   *v = a->cvec;
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1463 {
1464   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1465 
1466   PetscFunctionBegin;
1467   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1468   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1469   a->vecinuse = 0;
1470   PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1471   PetscCall(VecHIPResetArray(a->cvec));
1472   if (v) *v = NULL;
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1477 {
1478   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1479 
1480   PetscFunctionBegin;
1481   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1482   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1483   PetscCall(MatDenseHIPPlaceArray(l->A, a));
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A)
1488 {
1489   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1490 
1491   PetscFunctionBegin;
1492   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1493   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1494   PetscCall(MatDenseHIPResetArray(l->A));
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1499 {
1500   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1501 
1502   PetscFunctionBegin;
1503   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1504   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1505   PetscCall(MatDenseHIPReplaceArray(l->A, a));
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1510 {
1511   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1512 
1513   PetscFunctionBegin;
1514   PetscCall(MatDenseHIPGetArrayWrite(l->A, a));
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1519 {
1520   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1521 
1522   PetscFunctionBegin;
1523   PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a));
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1528 {
1529   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1530 
1531   PetscFunctionBegin;
1532   PetscCall(MatDenseHIPGetArrayRead(l->A, a));
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1537 {
1538   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1539 
1540   PetscFunctionBegin;
1541   PetscCall(MatDenseHIPRestoreArrayRead(l->A, a));
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a)
1546 {
1547   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1548 
1549   PetscFunctionBegin;
1550   PetscCall(MatDenseHIPGetArray(l->A, a));
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a)
1555 {
1556   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1557 
1558   PetscFunctionBegin;
1559   PetscCall(MatDenseHIPRestoreArray(l->A, a));
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1564 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1565 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1566 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1567 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1568 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1569 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1570 
1571 static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind)
1572 {
1573   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1574 
1575   PetscFunctionBegin;
1576   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1577   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1578   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1579   mat->boundtocpu = bind;
1580   if (!bind) {
1581     PetscBool iscuda;
1582 
1583     PetscCall(PetscFree(mat->defaultrandtype));
1584     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1585     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda));
1586     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1587     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda));
1588     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1589     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP));
1590     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP));
1591     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP));
1592     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP));
1593     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP));
1594     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP));
1595     mat->ops->shift = MatShift_MPIDenseHIP;
1596   } else {
1597     PetscCall(PetscFree(mat->defaultrandtype));
1598     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1599     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1600     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1601     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1602     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1603     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1604     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1605     mat->ops->shift = MatShift_MPIDense;
1606   }
1607   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data)
1612 {
1613   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1614   PetscBool     iscuda;
1615 
1616   PetscFunctionBegin;
1617   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1618   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda));
1619   if (!iscuda) PetscFunctionReturn(0);
1620   PetscCall(PetscLayoutSetUp(A->rmap));
1621   PetscCall(PetscLayoutSetUp(A->cmap));
1622   if (!d->A) {
1623     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1624     PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A));
1625     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1626   }
1627   PetscCall(MatSetType(d->A, MATSEQDENSEHIP));
1628   PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data));
1629   A->preallocated = PETSC_TRUE;
1630   A->assembled    = PETSC_TRUE;
1631   PetscFunctionReturn(0);
1632 }
1633 #endif
1634 
1635 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1636 {
1637   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1638 
1639   PetscFunctionBegin;
1640   PetscCall(MatSetRandom(d->A, rctx));
1641 #if defined(PETSC_HAVE_DEVICE)
1642   x->offloadmask = d->A->offloadmask;
1643 #endif
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1648 {
1649   PetscFunctionBegin;
1650   *missing = PETSC_FALSE;
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1655 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1656 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1657 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1658 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1659 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1660 
1661 /* -------------------------------------------------------------------*/
1662 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1663                                        MatGetRow_MPIDense,
1664                                        MatRestoreRow_MPIDense,
1665                                        MatMult_MPIDense,
1666                                        /*  4*/ MatMultAdd_MPIDense,
1667                                        MatMultTranspose_MPIDense,
1668                                        MatMultTransposeAdd_MPIDense,
1669                                        NULL,
1670                                        NULL,
1671                                        NULL,
1672                                        /* 10*/ NULL,
1673                                        NULL,
1674                                        NULL,
1675                                        NULL,
1676                                        MatTranspose_MPIDense,
1677                                        /* 15*/ MatGetInfo_MPIDense,
1678                                        MatEqual_MPIDense,
1679                                        MatGetDiagonal_MPIDense,
1680                                        MatDiagonalScale_MPIDense,
1681                                        MatNorm_MPIDense,
1682                                        /* 20*/ MatAssemblyBegin_MPIDense,
1683                                        MatAssemblyEnd_MPIDense,
1684                                        MatSetOption_MPIDense,
1685                                        MatZeroEntries_MPIDense,
1686                                        /* 24*/ MatZeroRows_MPIDense,
1687                                        NULL,
1688                                        NULL,
1689                                        NULL,
1690                                        NULL,
1691                                        /* 29*/ MatSetUp_MPIDense,
1692                                        NULL,
1693                                        NULL,
1694                                        MatGetDiagonalBlock_MPIDense,
1695                                        NULL,
1696                                        /* 34*/ MatDuplicate_MPIDense,
1697                                        NULL,
1698                                        NULL,
1699                                        NULL,
1700                                        NULL,
1701                                        /* 39*/ MatAXPY_MPIDense,
1702                                        MatCreateSubMatrices_MPIDense,
1703                                        NULL,
1704                                        MatGetValues_MPIDense,
1705                                        MatCopy_MPIDense,
1706                                        /* 44*/ NULL,
1707                                        MatScale_MPIDense,
1708                                        MatShift_MPIDense,
1709                                        NULL,
1710                                        NULL,
1711                                        /* 49*/ MatSetRandom_MPIDense,
1712                                        NULL,
1713                                        NULL,
1714                                        NULL,
1715                                        NULL,
1716                                        /* 54*/ NULL,
1717                                        NULL,
1718                                        NULL,
1719                                        NULL,
1720                                        NULL,
1721                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1722                                        MatDestroy_MPIDense,
1723                                        MatView_MPIDense,
1724                                        NULL,
1725                                        NULL,
1726                                        /* 64*/ NULL,
1727                                        NULL,
1728                                        NULL,
1729                                        NULL,
1730                                        NULL,
1731                                        /* 69*/ NULL,
1732                                        NULL,
1733                                        NULL,
1734                                        NULL,
1735                                        NULL,
1736                                        /* 74*/ NULL,
1737                                        NULL,
1738                                        NULL,
1739                                        NULL,
1740                                        NULL,
1741                                        /* 79*/ NULL,
1742                                        NULL,
1743                                        NULL,
1744                                        NULL,
1745                                        /* 83*/ MatLoad_MPIDense,
1746                                        NULL,
1747                                        NULL,
1748                                        NULL,
1749                                        NULL,
1750                                        NULL,
1751                                        /* 89*/ NULL,
1752                                        NULL,
1753                                        NULL,
1754                                        NULL,
1755                                        NULL,
1756                                        /* 94*/ NULL,
1757                                        NULL,
1758                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1759                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1760                                        NULL,
1761                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1762                                        NULL,
1763                                        NULL,
1764                                        MatConjugate_MPIDense,
1765                                        NULL,
1766                                        /*104*/ NULL,
1767                                        MatRealPart_MPIDense,
1768                                        MatImaginaryPart_MPIDense,
1769                                        NULL,
1770                                        NULL,
1771                                        /*109*/ NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        MatGetColumnVector_MPIDense,
1775                                        MatMissingDiagonal_MPIDense,
1776                                        /*114*/ NULL,
1777                                        NULL,
1778                                        NULL,
1779                                        NULL,
1780                                        NULL,
1781                                        /*119*/ NULL,
1782                                        NULL,
1783                                        NULL,
1784                                        NULL,
1785                                        NULL,
1786                                        /*124*/ NULL,
1787                                        MatGetColumnReductions_MPIDense,
1788                                        NULL,
1789                                        NULL,
1790                                        NULL,
1791                                        /*129*/ NULL,
1792                                        NULL,
1793                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1794                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1795                                        NULL,
1796                                        /*134*/ NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        /*139*/ NULL,
1802                                        NULL,
1803                                        NULL,
1804                                        NULL,
1805                                        NULL,
1806                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1807                                        /*145*/ NULL,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        /*150*/ NULL,
1813                                        NULL};
1814 
1815 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1816 {
1817   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1818   MatType       mtype = MATSEQDENSE;
1819 
1820   PetscFunctionBegin;
1821   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1822   PetscCall(PetscLayoutSetUp(mat->rmap));
1823   PetscCall(PetscLayoutSetUp(mat->cmap));
1824   if (!a->A) {
1825     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1826     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1827   }
1828 #if defined(PETSC_HAVE_CUDA)
1829   PetscBool iscuda;
1830   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1831   if (iscuda) mtype = MATSEQDENSECUDA;
1832 #endif
1833 #if defined(PETSC_HAVE_HIP)
1834   PetscBool iship;
1835   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1836   if (iship) mtype = MATSEQDENSEHIP;
1837 #endif
1838   PetscCall(MatSetType(a->A, mtype));
1839   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1840 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1841   mat->offloadmask = a->A->offloadmask;
1842 #endif
1843   mat->preallocated = PETSC_TRUE;
1844   mat->assembled    = PETSC_TRUE;
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1849 {
1850   Mat B, C;
1851 
1852   PetscFunctionBegin;
1853   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1854   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1855   PetscCall(MatDestroy(&C));
1856   if (reuse == MAT_REUSE_MATRIX) {
1857     C = *newmat;
1858   } else C = NULL;
1859   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1860   PetscCall(MatDestroy(&B));
1861   if (reuse == MAT_INPLACE_MATRIX) {
1862     PetscCall(MatHeaderReplace(A, &C));
1863   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1868 {
1869   Mat B, C;
1870 
1871   PetscFunctionBegin;
1872   PetscCall(MatDenseGetLocalMatrix(A, &C));
1873   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1874   if (reuse == MAT_REUSE_MATRIX) {
1875     C = *newmat;
1876   } else C = NULL;
1877   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1878   PetscCall(MatDestroy(&B));
1879   if (reuse == MAT_INPLACE_MATRIX) {
1880     PetscCall(MatHeaderReplace(A, &C));
1881   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #if defined(PETSC_HAVE_ELEMENTAL)
1886 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1887 {
1888   Mat          mat_elemental;
1889   PetscScalar *v;
1890   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) {
1894     mat_elemental = *newmat;
1895     PetscCall(MatZeroEntries(*newmat));
1896   } else {
1897     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1898     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1899     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1900     PetscCall(MatSetUp(mat_elemental));
1901     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1902   }
1903 
1904   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1905   for (i = 0; i < N; i++) cols[i] = i;
1906   for (i = 0; i < m; i++) rows[i] = rstart + i;
1907 
1908   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1909   PetscCall(MatDenseGetArray(A, &v));
1910   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1911   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1912   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1913   PetscCall(MatDenseRestoreArray(A, &v));
1914   PetscCall(PetscFree2(rows, cols));
1915 
1916   if (reuse == MAT_INPLACE_MATRIX) {
1917     PetscCall(MatHeaderReplace(A, &mat_elemental));
1918   } else {
1919     *newmat = mat_elemental;
1920   }
1921   PetscFunctionReturn(0);
1922 }
1923 #endif
1924 
1925 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1926 {
1927   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1928 
1929   PetscFunctionBegin;
1930   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1935 {
1936   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1937 
1938   PetscFunctionBegin;
1939   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1944 {
1945   Mat_MPIDense *mat;
1946   PetscInt      m, nloc, N;
1947 
1948   PetscFunctionBegin;
1949   PetscCall(MatGetSize(inmat, &m, &N));
1950   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1951   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1952     PetscInt sum;
1953 
1954     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1955     /* Check sum(n) = N */
1956     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1957     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1958 
1959     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1960     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1961   }
1962 
1963   /* numeric phase */
1964   mat = (Mat_MPIDense *)(*outmat)->data;
1965   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #if defined(PETSC_HAVE_CUDA)
1970 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1971 {
1972   Mat           B;
1973   Mat_MPIDense *m;
1974 
1975   PetscFunctionBegin;
1976   if (reuse == MAT_INITIAL_MATRIX) {
1977     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1978   } else if (reuse == MAT_REUSE_MATRIX) {
1979     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1980   }
1981 
1982   B = *newmat;
1983   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1984   PetscCall(PetscFree(B->defaultvectype));
1985   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1986   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1987   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1988   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1989   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1990   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1991   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1992   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1993   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1994   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1995   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1996   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1997   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1998   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1999   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
2000   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
2001   m = (Mat_MPIDense *)(B)->data;
2002   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2003   B->ops->bindtocpu = NULL;
2004   B->offloadmask    = PETSC_OFFLOAD_CPU;
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2009 {
2010   Mat           B;
2011   Mat_MPIDense *m;
2012 
2013   PetscFunctionBegin;
2014   if (reuse == MAT_INITIAL_MATRIX) {
2015     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2016   } else if (reuse == MAT_REUSE_MATRIX) {
2017     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2018   }
2019 
2020   B = *newmat;
2021   PetscCall(PetscFree(B->defaultvectype));
2022   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
2023   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
2024   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
2025   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2027   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2028   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2029   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
2030   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
2031   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
2032   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
2033   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
2034   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
2035   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
2036   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
2037   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
2038   m = (Mat_MPIDense *)(B->data);
2039   if (m->A) {
2040     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
2041     B->offloadmask = PETSC_OFFLOAD_BOTH;
2042   } else {
2043     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2044   }
2045   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
2046 
2047   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
2048   PetscFunctionReturn(0);
2049 }
2050 #endif
2051 
2052 #if defined(PETSC_HAVE_HIP)
2053 PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2054 {
2055   Mat           B;
2056   Mat_MPIDense *m;
2057 
2058   PetscFunctionBegin;
2059   if (reuse == MAT_INITIAL_MATRIX) {
2060     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2061   } else if (reuse == MAT_REUSE_MATRIX) {
2062     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2063   }
2064 
2065   B = *newmat;
2066   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE));
2067   PetscCall(PetscFree(B->defaultvectype));
2068   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
2069   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
2070   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL));
2071   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
2072   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
2073   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
2074   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
2075   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL));
2076   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL));
2077   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL));
2078   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL));
2079   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL));
2080   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL));
2081   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL));
2082   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL));
2083   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL));
2084   m = (Mat_MPIDense *)(B)->data;
2085   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2086   B->ops->bindtocpu = NULL;
2087   B->offloadmask    = PETSC_OFFLOAD_CPU;
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2092 {
2093   Mat           B;
2094   Mat_MPIDense *m;
2095 
2096   PetscFunctionBegin;
2097   if (reuse == MAT_INITIAL_MATRIX) {
2098     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2099   } else if (reuse == MAT_REUSE_MATRIX) {
2100     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2101   }
2102 
2103   B = *newmat;
2104   PetscCall(PetscFree(B->defaultvectype));
2105   PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype));
2106   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP));
2107   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense));
2108   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2109   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2110   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2111   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2112   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP));
2113   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP));
2114   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP));
2115   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP));
2116   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP));
2117   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP));
2118   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP));
2119   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP));
2120   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP));
2121   m = (Mat_MPIDense *)(B->data);
2122   if (m->A) {
2123     PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A));
2124     B->offloadmask = PETSC_OFFLOAD_BOTH;
2125   } else {
2126     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2127   }
2128   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE));
2129 
2130   B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP;
2131   PetscFunctionReturn(0);
2132 }
2133 #endif
2134 
2135 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2136 {
2137   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2138   PetscInt      lda;
2139 
2140   PetscFunctionBegin;
2141   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2142   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2143   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2144   a->vecinuse = col + 1;
2145   PetscCall(MatDenseGetLDA(a->A, &lda));
2146   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
2147   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2148   *v = a->cvec;
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2153 {
2154   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2155 
2156   PetscFunctionBegin;
2157   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2158   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
2159   a->vecinuse = 0;
2160   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
2161   PetscCall(VecResetArray(a->cvec));
2162   if (v) *v = NULL;
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2167 {
2168   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2169   PetscInt      lda;
2170 
2171   PetscFunctionBegin;
2172   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2173   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2174   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2175   a->vecinuse = col + 1;
2176   PetscCall(MatDenseGetLDA(a->A, &lda));
2177   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
2178   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2179   PetscCall(VecLockReadPush(a->cvec));
2180   *v = a->cvec;
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2185 {
2186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2187 
2188   PetscFunctionBegin;
2189   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2190   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2191   a->vecinuse = 0;
2192   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
2193   PetscCall(VecLockReadPop(a->cvec));
2194   PetscCall(VecResetArray(a->cvec));
2195   if (v) *v = NULL;
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2200 {
2201   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2202   PetscInt      lda;
2203 
2204   PetscFunctionBegin;
2205   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2206   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2207   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2208   a->vecinuse = col + 1;
2209   PetscCall(MatDenseGetLDA(a->A, &lda));
2210   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2211   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2212   *v = a->cvec;
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2217 {
2218   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2219 
2220   PetscFunctionBegin;
2221   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2222   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2223   a->vecinuse = 0;
2224   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2225   PetscCall(VecResetArray(a->cvec));
2226   if (v) *v = NULL;
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
2231 {
2232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2233   Mat_MPIDense *c;
2234   MPI_Comm      comm;
2235   PetscInt      pbegin, pend;
2236 
2237   PetscFunctionBegin;
2238   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2239   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2240   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2241   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
2242   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
2243   if (!a->cmat) {
2244     PetscCall(MatCreate(comm, &a->cmat));
2245     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
2246     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
2247     else {
2248       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2249       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2250       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2251     }
2252     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2253     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2254   } else {
2255     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
2256     if (same && a->cmat->rmap->N != A->rmap->N) {
2257       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
2258       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2259     }
2260     if (!same) {
2261       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
2262       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
2263       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2264       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2265       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2266     }
2267     if (cend - cbegin != a->cmat->cmap->N) {
2268       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
2269       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
2270       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2271       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2272     }
2273   }
2274   c = (Mat_MPIDense *)a->cmat->data;
2275   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2276   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
2277 
2278   a->cmat->preallocated = PETSC_TRUE;
2279   a->cmat->assembled    = PETSC_TRUE;
2280 #if defined(PETSC_HAVE_DEVICE)
2281   a->cmat->offloadmask = c->A->offloadmask;
2282 #endif
2283   a->matinuse = cbegin + 1;
2284   *v          = a->cmat;
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
2289 {
2290   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2291   Mat_MPIDense *c;
2292 
2293   PetscFunctionBegin;
2294   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
2295   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
2296   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
2297   a->matinuse = 0;
2298   c           = (Mat_MPIDense *)a->cmat->data;
2299   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
2300   if (v) *v = NULL;
2301 #if defined(PETSC_HAVE_DEVICE)
2302   A->offloadmask = a->A->offloadmask;
2303 #endif
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 /*MC
2308    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
2309 
2310    Options Database Keys:
2311 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
2312 
2313   Level: beginner
2314 
2315 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
2316 M*/
2317 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
2318 {
2319   Mat_MPIDense *a;
2320 
2321   PetscFunctionBegin;
2322   PetscCall(PetscNew(&a));
2323   mat->data = (void *)a;
2324   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
2325 
2326   mat->insertmode = NOT_SET_VALUES;
2327 
2328   /* build cache for off array entries formed */
2329   a->donotstash = PETSC_FALSE;
2330 
2331   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
2332 
2333   /* stuff used for matrix vector multiply */
2334   a->lvec        = NULL;
2335   a->Mvctx       = NULL;
2336   a->roworiented = PETSC_TRUE;
2337 
2338   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
2339   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
2340   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
2341   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
2342   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
2343   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
2344   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
2345   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
2346   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
2347   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
2348   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
2349   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
2350   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
2351   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
2352   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
2353   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
2354   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
2355   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
2356   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
2357   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
2358   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
2359 #if defined(PETSC_HAVE_ELEMENTAL)
2360   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
2361 #endif
2362 #if defined(PETSC_HAVE_SCALAPACK)
2363   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
2364 #endif
2365 #if defined(PETSC_HAVE_CUDA)
2366   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
2367 #endif
2368   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
2369   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2370   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2371 #if defined(PETSC_HAVE_CUDA)
2372   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2373   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2374 #endif
2375 #if defined(PETSC_HAVE_HIP)
2376   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
2377   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2378   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2379 #endif
2380   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
2381   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
2382   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 /*MC
2387    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2388 
2389    Options Database Keys:
2390 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
2391 
2392   Level: beginner
2393 
2394 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
2395 M*/
2396 #if defined(PETSC_HAVE_CUDA)
2397   #include <petsc/private/deviceimpl.h>
2398 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2399 {
2400   PetscFunctionBegin;
2401   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2402   PetscCall(MatCreate_MPIDense(B));
2403   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
2404   PetscFunctionReturn(0);
2405 }
2406 #endif
2407 
2408 /*MC
2409    MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs.
2410 
2411    Options Database Keys:
2412 . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()`
2413 
2414   Level: beginner
2415 
2416 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP`
2417 M*/
2418 #if defined(PETSC_HAVE_HIP)
2419   #include <petsc/private/deviceimpl.h>
2420 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B)
2421 {
2422   PetscFunctionBegin;
2423   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
2424   PetscCall(MatCreate_MPIDense(B));
2425   PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B));
2426   PetscFunctionReturn(0);
2427 }
2428 #endif
2429 
2430 /*MC
2431    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2432 
2433    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2434    and `MATMPIDENSE` otherwise.
2435 
2436    Options Database Keys:
2437 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
2438 
2439   Level: beginner
2440 
2441 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
2442 M*/
2443 
2444 /*MC
2445    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2446    Similarly,
2447    MATDENSEHIP - MATDENSEHIP = "densehip"
2448 
2449    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2450    and `MATMPIDENSECUDA` otherwise.
2451 
2452    Options Database Keys:
2453 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
2454 
2455   Level: beginner
2456 
2457 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2458 M*/
2459 
2460 /*MC
2461    MATDENSEHIP - MATDENSEHIP = "densehip" - A matrix type to be used for dense matrices on GPUs.
2462 
2463    This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator,
2464    and `MATMPIDENSEHIP` otherwise.
2465 
2466    Options Database Keys:
2467 . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()`
2468 
2469   Level: beginner
2470 
2471 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2472 M*/
2473 
2474 /*@C
2475    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2476 
2477    Collective
2478 
2479    Input Parameters:
2480 .  B - the matrix
2481 -  data - optional location of matrix data.  Set data=NULL for PETSc
2482    to control all matrix memory allocation.
2483 
2484    Notes:
2485    The dense format is fully compatible with standard Fortran 77
2486    storage by columns.
2487 
2488    The data input variable is intended primarily for Fortran programmers
2489    who wish to allocate their own matrix memory space.  Most users should
2490    set data=NULL.
2491 
2492    Level: intermediate
2493 
2494 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2495 @*/
2496 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2497 {
2498   PetscFunctionBegin;
2499   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2500   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 /*@
2505    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2506    array provided by the user. This is useful to avoid copying an array
2507    into a matrix
2508 
2509    Not Collective
2510 
2511    Input Parameters:
2512 +  mat - the matrix
2513 -  array - the array in column major order
2514 
2515    Note:
2516    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2517    freed when the matrix is destroyed.
2518 
2519    Level: developer
2520 
2521 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2522           `MatDenseReplaceArray()`
2523 @*/
2524 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2525 {
2526   PetscFunctionBegin;
2527   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2528   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2529   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2530 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2531   mat->offloadmask = PETSC_OFFLOAD_CPU;
2532 #endif
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 /*@
2537    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2538 
2539    Not Collective
2540 
2541    Input Parameters:
2542 .  mat - the matrix
2543 
2544    Note:
2545    You can only call this after a call to `MatDensePlaceArray()`
2546 
2547    Level: developer
2548 
2549 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2550 @*/
2551 PetscErrorCode MatDenseResetArray(Mat mat)
2552 {
2553   PetscFunctionBegin;
2554   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2555   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2556   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /*@
2561    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2562    array provided by the user. This is useful to avoid copying an array
2563    into a matrix
2564 
2565    Not Collective
2566 
2567    Input Parameters:
2568 +  mat - the matrix
2569 -  array - the array in column major order
2570 
2571    Note:
2572    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2573    freed by the user. It will be freed when the matrix is destroyed.
2574 
2575    Level: developer
2576 
2577 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2578 @*/
2579 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2580 {
2581   PetscFunctionBegin;
2582   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2583   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2584   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2585 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2586   mat->offloadmask = PETSC_OFFLOAD_CPU;
2587 #endif
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #if defined(PETSC_HAVE_CUDA)
2592 /*@C
2593    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2594    array provided by the user. This is useful to avoid copying an array
2595    into a matrix
2596 
2597    Not Collective
2598 
2599    Input Parameters:
2600 +  mat - the matrix
2601 -  array - the array in column major order
2602 
2603    Note:
2604    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2605    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2606 
2607    Level: developer
2608 
2609 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2610 @*/
2611 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2612 {
2613   PetscFunctionBegin;
2614   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2615   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2616   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2617   mat->offloadmask = PETSC_OFFLOAD_GPU;
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 /*@C
2622    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2623 
2624    Not Collective
2625 
2626    Input Parameters:
2627 .  mat - the matrix
2628 
2629    Note:
2630    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2631 
2632    Level: developer
2633 
2634 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2635 @*/
2636 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2637 {
2638   PetscFunctionBegin;
2639   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2640   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2641   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 /*@C
2646    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2647    array provided by the user. This is useful to avoid copying an array
2648    into a matrix
2649 
2650    Not Collective
2651 
2652    Input Parameters:
2653 +  mat - the matrix
2654 -  array - the array in column major order
2655 
2656    Note:
2657    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2658    The memory passed in CANNOT be freed by the user. It will be freed
2659    when the matrix is destroyed. The array should respect the matrix leading dimension.
2660 
2661    Level: developer
2662 
2663 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2664 @*/
2665 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2666 {
2667   PetscFunctionBegin;
2668   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2669   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2670   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2671   mat->offloadmask = PETSC_OFFLOAD_GPU;
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 /*@C
2676    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2677 
2678    Not Collective
2679 
2680    Input Parameters:
2681 .  A - the matrix
2682 
2683    Output Parameters
2684 .  array - the GPU array in column major order
2685 
2686    Notes:
2687    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2688 
2689    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2690 
2691    Level: developer
2692 
2693 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2694 @*/
2695 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2696 {
2697   PetscFunctionBegin;
2698   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2699   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2700   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 /*@C
2705    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2706 
2707    Not Collective
2708 
2709    Input Parameters:
2710 +  A - the matrix
2711 -  array - the GPU array in column major order
2712 
2713    Level: developer
2714 
2715 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2716 @*/
2717 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2718 {
2719   PetscFunctionBegin;
2720   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2721   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2722   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2723   A->offloadmask = PETSC_OFFLOAD_GPU;
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 /*@C
2728    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2729 
2730    Not Collective
2731 
2732    Input Parameters:
2733 .  A - the matrix
2734 
2735    Output Parameters
2736 .  array - the GPU array in column major order
2737 
2738    Note:
2739    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2740 
2741    Level: developer
2742 
2743 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2744 @*/
2745 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2746 {
2747   PetscFunctionBegin;
2748   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2749   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 /*@C
2754    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2755 
2756    Not Collective
2757 
2758    Input Parameters:
2759 +  A - the matrix
2760 -  array - the GPU array in column major order
2761 
2762    Note:
2763    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2764 
2765    Level: developer
2766 
2767 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2768 @*/
2769 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2770 {
2771   PetscFunctionBegin;
2772   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 /*@C
2777    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2778 
2779    Not Collective
2780 
2781    Input Parameters:
2782 .  A - the matrix
2783 
2784    Output Parameters
2785 .  array - the GPU array in column major order
2786 
2787    Note:
2788    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2789 
2790    Level: developer
2791 
2792 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2793 @*/
2794 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2795 {
2796   PetscFunctionBegin;
2797   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2798   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2799   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 /*@C
2804    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2805 
2806    Not Collective
2807 
2808    Input Parameters:
2809 +  A - the matrix
2810 -  array - the GPU array in column major order
2811 
2812    Level: developer
2813 
2814 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2815 @*/
2816 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2817 {
2818   PetscFunctionBegin;
2819   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2820   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2821   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2822   A->offloadmask = PETSC_OFFLOAD_GPU;
2823   PetscFunctionReturn(0);
2824 }
2825 #endif
2826 
2827 #if defined(PETSC_HAVE_HIP)
2828 /*@C
2829    MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2830    array provided by the user. This is useful to avoid copying an array
2831    into a matrix
2832 
2833    Not Collective
2834 
2835    Input Parameters:
2836 +  mat - the matrix
2837 -  array - the array in column major order
2838 
2839    Notes:
2840    You can return to the original array with a call to MatDenseHIPResetArray(). The user is responsible for freeing this array; it will not be
2841    freed when the matrix is destroyed. The array must have been allocated with hipMalloc().
2842 
2843    Level: developer
2844 
2845 .seealso: MatDenseHIPGetArray(), MatDenseHIPResetArray()
2846 @*/
2847 PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array)
2848 {
2849   PetscFunctionBegin;
2850   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2851   PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2852   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2853   mat->offloadmask = PETSC_OFFLOAD_GPU;
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 /*@C
2858    MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to MatDenseHIPPlaceArray()
2859 
2860    Not Collective
2861 
2862    Input Parameters:
2863 .  mat - the matrix
2864 
2865    Notes:
2866    You can only call this after a call to MatDenseHIPPlaceArray()
2867 
2868    Level: developer
2869 
2870 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray()
2871 
2872 @*/
2873 PetscErrorCode MatDenseHIPResetArray(Mat mat)
2874 {
2875   PetscFunctionBegin;
2876   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2877   PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat));
2878   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 /*@C
2883    MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2884    array provided by the user. This is useful to avoid copying an array
2885    into a matrix
2886 
2887    Not Collective
2888 
2889    Input Parameters:
2890 +  mat - the matrix
2891 -  array - the array in column major order
2892 
2893    Notes:
2894    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2895    The memory passed in CANNOT be freed by the user. It will be freed
2896    when the matrix is destroyed. The array should respect the matrix leading dimension.
2897 
2898    Level: developer
2899 
2900 .seealso: MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray()
2901 @*/
2902 PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array)
2903 {
2904   PetscFunctionBegin;
2905   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2906   PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2907   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2908   mat->offloadmask = PETSC_OFFLOAD_GPU;
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 /*@C
2913    MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix.
2914 
2915    Not Collective
2916 
2917    Input Parameters:
2918 .  A - the matrix
2919 
2920    Output Parameters
2921 .  array - the GPU array in column major order
2922 
2923    Notes:
2924    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseHIPGetArray(). The array must be restored with MatDenseHIPRestoreArrayWrite() when no longer needed.
2925 
2926    Level: developer
2927 
2928 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead()
2929 @*/
2930 PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a)
2931 {
2932   PetscFunctionBegin;
2933   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2934   PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2935   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 /*@C
2940    MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArrayWrite().
2941 
2942    Not Collective
2943 
2944    Input Parameters:
2945 +  A - the matrix
2946 -  array - the GPU array in column major order
2947 
2948    Notes:
2949 
2950    Level: developer
2951 
2952 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2953 @*/
2954 PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a)
2955 {
2956   PetscFunctionBegin;
2957   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2958   PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2959   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2960   A->offloadmask = PETSC_OFFLOAD_GPU;
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 /*@C
2965    MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed.
2966 
2967    Not Collective
2968 
2969    Input Parameters:
2970 .  A - the matrix
2971 
2972    Output Parameters
2973 .  array - the GPU array in column major order
2974 
2975    Notes:
2976    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().
2977 
2978    Level: developer
2979 
2980 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2981 @*/
2982 PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a)
2983 {
2984   PetscFunctionBegin;
2985   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2986   PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 /*@C
2991    MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead().
2992 
2993    Not Collective
2994 
2995    Input Parameters:
2996 +  A - the matrix
2997 -  array - the GPU array in column major order
2998 
2999    Notes:
3000    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite().
3001 
3002    Level: developer
3003 
3004 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead()
3005 @*/
3006 PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a)
3007 {
3008   PetscFunctionBegin;
3009   PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 /*@C
3014    MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed.
3015 
3016    Not Collective
3017 
3018    Input Parameters:
3019 .  A - the matrix
3020 
3021    Output Parameters
3022 .  array - the GPU array in column major order
3023 
3024    Notes:
3025    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseHIPGetArrayWrite(). For read-only access, use MatDenseHIPGetArrayRead().
3026 
3027    Level: developer
3028 
3029 .seealso: MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
3030 @*/
3031 PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a)
3032 {
3033   PetscFunctionBegin;
3034   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3035   PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a));
3036   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 /*@C
3041    MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray().
3042 
3043    Not Collective
3044 
3045    Input Parameters:
3046 +  A - the matrix
3047 -  array - the GPU array in column major order
3048 
3049    Notes:
3050 
3051    Level: developer
3052 
3053 .seealso: MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
3054 @*/
3055 PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a)
3056 {
3057   PetscFunctionBegin;
3058   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
3059   PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a));
3060   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3061   A->offloadmask = PETSC_OFFLOAD_GPU;
3062   PetscFunctionReturn(0);
3063 }
3064 #endif
3065 
3066 /*@C
3067    MatCreateDense - Creates a matrix in `MATDENSE` format.
3068 
3069    Collective
3070 
3071    Input Parameters:
3072 +  comm - MPI communicator
3073 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3074 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3075 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3076 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3077 -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
3078    to control all matrix memory allocation.
3079 
3080    Output Parameter:
3081 .  A - the matrix
3082 
3083    Notes:
3084    The dense format is fully compatible with standard Fortran 77
3085    storage by columns.
3086 
3087    Although local portions of the matrix are stored in column-major
3088    order, the matrix is partitioned across MPI ranks by row.
3089 
3090    The data input variable is intended primarily for Fortran programmers
3091    who wish to allocate their own matrix memory space.  Most users should
3092    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
3093 
3094    The user MUST specify either the local or global matrix dimensions
3095    (possibly both).
3096 
3097    Level: intermediate
3098 
3099 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
3100 @*/
3101 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3102 {
3103   PetscFunctionBegin;
3104   PetscCall(MatCreate(comm, A));
3105   PetscCall(MatSetSizes(*A, m, n, M, N));
3106   PetscCall(MatSetType(*A, MATDENSE));
3107   PetscCall(MatSeqDenseSetPreallocation(*A, data));
3108   PetscCall(MatMPIDenseSetPreallocation(*A, data));
3109   PetscFunctionReturn(0);
3110 }
3111 
3112 #if defined(PETSC_HAVE_CUDA)
3113 /*@C
3114    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
3115 
3116    Collective
3117 
3118    Input Parameters:
3119 +  comm - MPI communicator
3120 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3121 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3122 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3123 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3124 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
3125    to control matrix memory allocation.
3126 
3127    Output Parameter:
3128 .  A - the matrix
3129 
3130    Level: intermediate
3131 
3132 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
3133 @*/
3134 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3135 {
3136   PetscFunctionBegin;
3137   PetscCall(MatCreate(comm, A));
3138   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3139   PetscCall(MatSetSizes(*A, m, n, M, N));
3140   PetscCall(MatSetType(*A, MATDENSECUDA));
3141   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
3142   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
3143   PetscFunctionReturn(0);
3144 }
3145 #endif
3146 
3147 #if defined(PETSC_HAVE_HIP)
3148 /*@C
3149    MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP.
3150 
3151    Collective
3152 
3153    Input Parameters:
3154 +  comm - MPI communicator
3155 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3156 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3157 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
3158 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
3159 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
3160    to control matrix memory allocation.
3161 
3162    Output Parameter:
3163 .  A - the matrix
3164 
3165    Level: intermediate
3166 
3167 .seealso: `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()`
3168 @*/
3169 PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3170 {
3171   PetscFunctionBegin;
3172   PetscCall(MatCreate(comm, A));
3173   PetscValidLogicalCollectiveBool(*A, !!data, 6);
3174   PetscCall(MatSetSizes(*A, m, n, M, N));
3175   PetscCall(MatSetType(*A, MATDENSEHIP));
3176   PetscCall(MatSeqDenseHIPSetPreallocation(*A, data));
3177   PetscCall(MatMPIDenseHIPSetPreallocation(*A, data));
3178   PetscFunctionReturn(0);
3179 }
3180 #endif
3181 
3182 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
3183 {
3184   Mat           mat;
3185   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
3186 
3187   PetscFunctionBegin;
3188   *newmat = NULL;
3189   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
3190   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3191   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
3192   a = (Mat_MPIDense *)mat->data;
3193 
3194   mat->factortype   = A->factortype;
3195   mat->assembled    = PETSC_TRUE;
3196   mat->preallocated = PETSC_TRUE;
3197 
3198   mat->insertmode = NOT_SET_VALUES;
3199   a->donotstash   = oldmat->donotstash;
3200 
3201   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
3202   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
3203 
3204   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3205 
3206   *newmat = mat;
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
3211 {
3212   PetscBool isbinary;
3213 #if defined(PETSC_HAVE_HDF5)
3214   PetscBool ishdf5;
3215 #endif
3216 
3217   PetscFunctionBegin;
3218   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3219   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3220   /* force binary viewer to load .info file if it has not yet done so */
3221   PetscCall(PetscViewerSetUp(viewer));
3222   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3223 #if defined(PETSC_HAVE_HDF5)
3224   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3225 #endif
3226   if (isbinary) {
3227     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
3228 #if defined(PETSC_HAVE_HDF5)
3229   } else if (ishdf5) {
3230     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
3231 #endif
3232   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3233   PetscFunctionReturn(0);
3234 }
3235 
3236 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
3237 {
3238   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
3239   Mat           a, b;
3240 
3241   PetscFunctionBegin;
3242   a = matA->A;
3243   b = matB->A;
3244   PetscCall(MatEqual(a, b, flag));
3245   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
3250 {
3251   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
3252 
3253   PetscFunctionBegin;
3254   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
3255   PetscCall(MatDestroy(&atb->atb));
3256   PetscCall(PetscFree(atb));
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
3261 {
3262   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
3263 
3264   PetscFunctionBegin;
3265   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
3266   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
3267   PetscCall(PetscFree(abt));
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3272 {
3273   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3274   Mat_TransMatMultDense *atb;
3275   MPI_Comm               comm;
3276   PetscMPIInt            size, *recvcounts;
3277   PetscScalar           *carray, *sendbuf;
3278   const PetscScalar     *atbarray;
3279   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
3280   const PetscInt        *ranges;
3281 
3282   PetscFunctionBegin;
3283   MatCheckProduct(C, 3);
3284   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3285   atb        = (Mat_TransMatMultDense *)C->product->data;
3286   recvcounts = atb->recvcounts;
3287   sendbuf    = atb->sendbuf;
3288 
3289   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3290   PetscCallMPI(MPI_Comm_size(comm, &size));
3291 
3292   /* compute atbarray = aseq^T * bseq */
3293   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
3294 
3295   PetscCall(MatGetOwnershipRanges(C, &ranges));
3296 
3297   /* arrange atbarray into sendbuf */
3298   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
3299   PetscCall(MatDenseGetLDA(atb->atb, &lda));
3300   for (proc = 0, k = 0; proc < size; proc++) {
3301     for (j = 0; j < cN; j++) {
3302       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
3303     }
3304   }
3305   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
3306 
3307   /* sum all atbarray to local values of C */
3308   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
3309   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
3310   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
3311   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3312   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3317 {
3318   MPI_Comm               comm;
3319   PetscMPIInt            size;
3320   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
3321   Mat_TransMatMultDense *atb;
3322   PetscBool              cisdense = PETSC_FALSE;
3323   PetscInt               i;
3324   const PetscInt        *ranges;
3325 
3326   PetscFunctionBegin;
3327   MatCheckProduct(C, 4);
3328   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3329   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3330   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
3331     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3332   }
3333 
3334   /* create matrix product C */
3335   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
3336 #if defined(PETSC_HAVE_CUDA)
3337   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
3338 #endif
3339 #if defined(PETSC_HAVE_HIP)
3340   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
3341 #endif
3342   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
3343   PetscCall(MatSetUp(C));
3344 
3345   /* create data structure for reuse C */
3346   PetscCallMPI(MPI_Comm_size(comm, &size));
3347   PetscCall(PetscNew(&atb));
3348   cM = C->rmap->N;
3349   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
3350   PetscCall(MatGetOwnershipRanges(C, &ranges));
3351   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
3352 
3353   C->product->data    = atb;
3354   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
3355   PetscFunctionReturn(0);
3356 }
3357 
3358 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3359 {
3360   MPI_Comm               comm;
3361   PetscMPIInt            i, size;
3362   PetscInt               maxRows, bufsiz;
3363   PetscMPIInt            tag;
3364   PetscInt               alg;
3365   Mat_MatTransMultDense *abt;
3366   Mat_Product           *product = C->product;
3367   PetscBool              flg;
3368 
3369   PetscFunctionBegin;
3370   MatCheckProduct(C, 4);
3371   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3372   /* check local size of A and B */
3373   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
3374 
3375   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
3376   alg = flg ? 0 : 1;
3377 
3378   /* setup matrix product C */
3379   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
3380   PetscCall(MatSetType(C, MATMPIDENSE));
3381   PetscCall(MatSetUp(C));
3382   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
3383 
3384   /* create data structure for reuse C */
3385   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3386   PetscCallMPI(MPI_Comm_size(comm, &size));
3387   PetscCall(PetscNew(&abt));
3388   abt->tag = tag;
3389   abt->alg = alg;
3390   switch (alg) {
3391   case 1: /* alg: "cyclic" */
3392     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
3393     bufsiz = A->cmap->N * maxRows;
3394     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
3395     break;
3396   default: /* alg: "allgatherv" */
3397     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
3398     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
3399     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
3400     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
3401     break;
3402   }
3403 
3404   C->product->data                = abt;
3405   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
3406   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
3411 {
3412   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3413   Mat_MatTransMultDense *abt;
3414   MPI_Comm               comm;
3415   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
3416   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
3417   PetscInt               i, cK             = A->cmap->N, k, j, bn;
3418   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3419   const PetscScalar     *av, *bv;
3420   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
3421   MPI_Request            reqs[2];
3422   const PetscInt        *ranges;
3423 
3424   PetscFunctionBegin;
3425   MatCheckProduct(C, 3);
3426   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3427   abt = (Mat_MatTransMultDense *)C->product->data;
3428   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3429   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3430   PetscCallMPI(MPI_Comm_size(comm, &size));
3431   PetscCall(MatDenseGetArrayRead(a->A, &av));
3432   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3433   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3434   PetscCall(MatDenseGetLDA(a->A, &i));
3435   PetscCall(PetscBLASIntCast(i, &alda));
3436   PetscCall(MatDenseGetLDA(b->A, &i));
3437   PetscCall(PetscBLASIntCast(i, &blda));
3438   PetscCall(MatDenseGetLDA(c->A, &i));
3439   PetscCall(PetscBLASIntCast(i, &clda));
3440   PetscCall(MatGetOwnershipRanges(B, &ranges));
3441   bn = B->rmap->n;
3442   if (blda == bn) {
3443     sendbuf = (PetscScalar *)bv;
3444   } else {
3445     sendbuf = abt->buf[0];
3446     for (k = 0, i = 0; i < cK; i++) {
3447       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
3448     }
3449   }
3450   if (size > 1) {
3451     sendto   = (rank + size - 1) % size;
3452     recvfrom = (rank + size + 1) % size;
3453   } else {
3454     sendto = recvfrom = 0;
3455   }
3456   PetscCall(PetscBLASIntCast(cK, &ck));
3457   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3458   recvisfrom = rank;
3459   for (i = 0; i < size; i++) {
3460     /* we have finished receiving in sending, bufs can be read/modified */
3461     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
3462     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
3463 
3464     if (nextrecvisfrom != rank) {
3465       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
3466       sendsiz = cK * bn;
3467       recvsiz = cK * nextbn;
3468       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
3469       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
3470       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
3471     }
3472 
3473     /* local aseq * sendbuf^T */
3474     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
3475     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
3476 
3477     if (nextrecvisfrom != rank) {
3478       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
3479       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
3480     }
3481     bn         = nextbn;
3482     recvisfrom = nextrecvisfrom;
3483     sendbuf    = recvbuf;
3484   }
3485   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3486   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3487   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3488   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3489   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
3494 {
3495   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3496   Mat_MatTransMultDense *abt;
3497   MPI_Comm               comm;
3498   PetscMPIInt            size;
3499   PetscScalar           *cv, *sendbuf, *recvbuf;
3500   const PetscScalar     *av, *bv;
3501   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
3502   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3503   PetscBLASInt           cm, cn, ck, alda, clda;
3504 
3505   PetscFunctionBegin;
3506   MatCheckProduct(C, 3);
3507   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3508   abt = (Mat_MatTransMultDense *)C->product->data;
3509   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3510   PetscCallMPI(MPI_Comm_size(comm, &size));
3511   PetscCall(MatDenseGetArrayRead(a->A, &av));
3512   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3513   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3514   PetscCall(MatDenseGetLDA(a->A, &i));
3515   PetscCall(PetscBLASIntCast(i, &alda));
3516   PetscCall(MatDenseGetLDA(b->A, &blda));
3517   PetscCall(MatDenseGetLDA(c->A, &i));
3518   PetscCall(PetscBLASIntCast(i, &clda));
3519   /* copy transpose of B into buf[0] */
3520   bn      = B->rmap->n;
3521   sendbuf = abt->buf[0];
3522   recvbuf = abt->buf[1];
3523   for (k = 0, j = 0; j < bn; j++) {
3524     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
3525   }
3526   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3527   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
3528   PetscCall(PetscBLASIntCast(cK, &ck));
3529   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3530   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
3531   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
3532   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3533   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3534   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3535   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3536   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3541 {
3542   Mat_MatTransMultDense *abt;
3543 
3544   PetscFunctionBegin;
3545   MatCheckProduct(C, 3);
3546   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3547   abt = (Mat_MatTransMultDense *)C->product->data;
3548   switch (abt->alg) {
3549   case 1:
3550     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
3551     break;
3552   default:
3553     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
3554     break;
3555   }
3556   PetscFunctionReturn(0);
3557 }
3558 
3559 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
3560 {
3561   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
3562 
3563   PetscFunctionBegin;
3564   PetscCall(MatDestroy(&ab->Ce));
3565   PetscCall(MatDestroy(&ab->Ae));
3566   PetscCall(MatDestroy(&ab->Be));
3567   PetscCall(PetscFree(ab));
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 #if defined(PETSC_HAVE_ELEMENTAL)
3572 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3573 {
3574   Mat_MatMultDense *ab;
3575 
3576   PetscFunctionBegin;
3577   MatCheckProduct(C, 3);
3578   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
3579   ab = (Mat_MatMultDense *)C->product->data;
3580   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
3581   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
3582   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
3583   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
3584   PetscFunctionReturn(0);
3585 }
3586 
3587 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3588 {
3589   Mat               Ae, Be, Ce;
3590   Mat_MatMultDense *ab;
3591 
3592   PetscFunctionBegin;
3593   MatCheckProduct(C, 4);
3594   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3595   /* check local size of A and B */
3596   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
3597     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3598   }
3599 
3600   /* create elemental matrices Ae and Be */
3601   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
3602   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
3603   PetscCall(MatSetType(Ae, MATELEMENTAL));
3604   PetscCall(MatSetUp(Ae));
3605   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
3606 
3607   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
3608   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
3609   PetscCall(MatSetType(Be, MATELEMENTAL));
3610   PetscCall(MatSetUp(Be));
3611   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
3612 
3613   /* compute symbolic Ce = Ae*Be */
3614   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
3615   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
3616 
3617   /* setup C */
3618   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3619   PetscCall(MatSetType(C, MATDENSE));
3620   PetscCall(MatSetUp(C));
3621 
3622   /* create data structure for reuse Cdense */
3623   PetscCall(PetscNew(&ab));
3624   ab->Ae = Ae;
3625   ab->Be = Be;
3626   ab->Ce = Ce;
3627 
3628   C->product->data       = ab;
3629   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
3630   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3631   PetscFunctionReturn(0);
3632 }
3633 #endif
3634 /* ----------------------------------------------- */
3635 #if defined(PETSC_HAVE_ELEMENTAL)
3636 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3637 {
3638   PetscFunctionBegin;
3639   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3640   C->ops->productsymbolic = MatProductSymbolic_AB;
3641   PetscFunctionReturn(0);
3642 }
3643 #endif
3644 
3645 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3646 {
3647   Mat_Product *product = C->product;
3648   Mat          A = product->A, B = product->B;
3649 
3650   PetscFunctionBegin;
3651   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3652     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3653   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3654   C->ops->productsymbolic          = MatProductSymbolic_AtB;
3655   PetscFunctionReturn(0);
3656 }
3657 
3658 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3659 {
3660   Mat_Product *product     = C->product;
3661   const char  *algTypes[2] = {"allgatherv", "cyclic"};
3662   PetscInt     alg, nalg = 2;
3663   PetscBool    flg = PETSC_FALSE;
3664 
3665   PetscFunctionBegin;
3666   /* Set default algorithm */
3667   alg = 0; /* default is allgatherv */
3668   PetscCall(PetscStrcmp(product->alg, "default", &flg));
3669   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3670 
3671   /* Get runtime option */
3672   if (product->api_user) {
3673     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
3674     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
3675     PetscOptionsEnd();
3676   } else {
3677     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
3678     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
3679     PetscOptionsEnd();
3680   }
3681   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3682 
3683   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3684   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3685   PetscFunctionReturn(0);
3686 }
3687 
3688 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3689 {
3690   Mat_Product *product = C->product;
3691 
3692   PetscFunctionBegin;
3693   switch (product->type) {
3694 #if defined(PETSC_HAVE_ELEMENTAL)
3695   case MATPRODUCT_AB:
3696     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
3697     break;
3698 #endif
3699   case MATPRODUCT_AtB:
3700     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3701     break;
3702   case MATPRODUCT_ABt:
3703     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3704     break;
3705   default:
3706     break;
3707   }
3708   PetscFunctionReturn(0);
3709 }
3710