xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ca0d62ff0b92f3b3ec10dd93d500c979243edb4e)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
554   PetscScalar       *x;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecGetArray(v, &x));
559   PetscCall(VecGetSize(v, &ng));
560   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
561   PetscCall(VecGetLocalSize(v, &nl));
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(PetscArrayzero(x + i, nl - i));
569   PetscCall(VecRestoreArray(v, &x));
570   PetscFunctionReturn(PETSC_SUCCESS);
571 }
572 
573 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
574 {
575   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
576 
577   PetscFunctionBegin;
578   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579   PetscCall(MatStashDestroy_Private(&mat->stash));
580   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
581   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
582   PetscCall(MatDestroy(&mdn->A));
583   PetscCall(VecDestroy(&mdn->lvec));
584   PetscCall(PetscSFDestroy(&mdn->Mvctx));
585   PetscCall(VecDestroy(&mdn->cvec));
586   PetscCall(MatDestroy(&mdn->cmat));
587 
588   PetscCall(PetscFree(mat->data));
589   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
590 
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
604 #if defined(PETSC_HAVE_ELEMENTAL)
605   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
606 #endif
607 #if defined(PETSC_HAVE_SCALAPACK)
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
609 #endif
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
613 #if defined(PETSC_HAVE_CUDA)
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
632 #endif
633 #if defined(PETSC_HAVE_HIP)
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
652 #endif
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
663 
664   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
665   PetscFunctionReturn(PETSC_SUCCESS);
666 }
667 
668 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
669 
670 #include <petscdraw.h>
671 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
672 {
673   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
674   PetscMPIInt       rank;
675   PetscViewerType   vtype;
676   PetscBool         iascii, isdraw;
677   PetscViewer       sviewer;
678   PetscViewerFormat format;
679 
680   PetscFunctionBegin;
681   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
683   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
684   if (iascii) {
685     PetscCall(PetscViewerGetType(viewer, &vtype));
686     PetscCall(PetscViewerGetFormat(viewer, &format));
687     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
688       MatInfo info;
689       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
690       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
691       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
692                                                    (PetscInt)info.memory));
693       PetscCall(PetscViewerFlush(viewer));
694       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
695       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
696       PetscFunctionReturn(PETSC_SUCCESS);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       PetscFunctionReturn(PETSC_SUCCESS);
699     }
700   } else if (isdraw) {
701     PetscDraw draw;
702     PetscBool isnull;
703 
704     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
705     PetscCall(PetscDrawIsNull(draw, &isnull));
706     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
707   }
708 
709   {
710     /* assemble the entire matrix onto first processor. */
711     Mat          A;
712     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
713     PetscInt    *cols;
714     PetscScalar *vals;
715 
716     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
717     if (rank == 0) {
718       PetscCall(MatSetSizes(A, M, N, M, N));
719     } else {
720       PetscCall(MatSetSizes(A, 0, 0, M, N));
721     }
722     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
723     PetscCall(MatSetType(A, MATMPIDENSE));
724     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
725 
726     /* Copy the matrix ... This isn't the most efficient means,
727        but it's quick for now */
728     A->insertmode = INSERT_VALUES;
729 
730     row = mat->rmap->rstart;
731     m   = mdn->A->rmap->n;
732     for (i = 0; i < m; i++) {
733       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
734       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
735       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
736       row++;
737     }
738 
739     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
741     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
742     if (rank == 0) {
743       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
744       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
745     }
746     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
747     PetscCall(PetscViewerFlush(viewer));
748     PetscCall(MatDestroy(&A));
749   }
750   PetscFunctionReturn(PETSC_SUCCESS);
751 }
752 
753 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
754 {
755   PetscBool iascii, isbinary, isdraw, issocket;
756 
757   PetscFunctionBegin;
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
761   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
762 
763   if (iascii || issocket || isdraw) {
764     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
765   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
770 {
771   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
772   Mat            mdn = mat->A;
773   PetscLogDouble isend[5], irecv[5];
774 
775   PetscFunctionBegin;
776   info->block_size = 1.0;
777 
778   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
779 
780   isend[0] = info->nz_used;
781   isend[1] = info->nz_allocated;
782   isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;
784   isend[4] = info->mallocs;
785   if (flag == MAT_LOCAL) {
786     info->nz_used      = isend[0];
787     info->nz_allocated = isend[1];
788     info->nz_unneeded  = isend[2];
789     info->memory       = isend[3];
790     info->mallocs      = isend[4];
791   } else if (flag == MAT_GLOBAL_MAX) {
792     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
801 
802     info->nz_used      = irecv[0];
803     info->nz_allocated = irecv[1];
804     info->nz_unneeded  = irecv[2];
805     info->memory       = irecv[3];
806     info->mallocs      = irecv[4];
807   }
808   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
809   info->fill_ratio_needed = 0;
810   info->factor_mallocs    = 0;
811   PetscFunctionReturn(PETSC_SUCCESS);
812 }
813 
814 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
815 {
816   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
817 
818   PetscFunctionBegin;
819   switch (op) {
820   case MAT_NEW_NONZERO_LOCATIONS:
821   case MAT_NEW_NONZERO_LOCATION_ERR:
822   case MAT_NEW_NONZERO_ALLOCATION_ERR:
823     MatCheckPreallocated(A, 1);
824     PetscCall(MatSetOption(a->A, op, flg));
825     break;
826   case MAT_ROW_ORIENTED:
827     MatCheckPreallocated(A, 1);
828     a->roworiented = flg;
829     PetscCall(MatSetOption(a->A, op, flg));
830     break;
831   case MAT_FORCE_DIAGONAL_ENTRIES:
832   case MAT_KEEP_NONZERO_PATTERN:
833   case MAT_USE_HASH_TABLE:
834   case MAT_SORTED_FULL:
835     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
836     break;
837   case MAT_IGNORE_OFF_PROC_ENTRIES:
838     a->donotstash = flg;
839     break;
840   case MAT_SYMMETRIC:
841   case MAT_STRUCTURALLY_SYMMETRIC:
842   case MAT_HERMITIAN:
843   case MAT_SYMMETRY_ETERNAL:
844   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
845   case MAT_SPD:
846   case MAT_IGNORE_LOWER_TRIANGULAR:
847   case MAT_IGNORE_ZERO_ENTRIES:
848   case MAT_SPD_ETERNAL:
849     /* if the diagonal matrix is square it inherits some of the properties above */
850     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
851     break;
852   default:
853     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
854   }
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
859 {
860   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
861   const PetscScalar *l;
862   PetscScalar        x, *v, *vv, *r;
863   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
864 
865   PetscFunctionBegin;
866   PetscCall(MatDenseGetArray(mdn->A, &vv));
867   PetscCall(MatDenseGetLDA(mdn->A, &lda));
868   PetscCall(MatGetLocalSize(A, &s2, &s3));
869   if (ll) {
870     PetscCall(VecGetLocalSize(ll, &s2a));
871     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
872     PetscCall(VecGetArrayRead(ll, &l));
873     for (i = 0; i < m; i++) {
874       x = l[i];
875       v = vv + i;
876       for (j = 0; j < n; j++) {
877         (*v) *= x;
878         v += lda;
879       }
880     }
881     PetscCall(VecRestoreArrayRead(ll, &l));
882     PetscCall(PetscLogFlops(1.0 * n * m));
883   }
884   if (rr) {
885     const PetscScalar *ar;
886 
887     PetscCall(VecGetLocalSize(rr, &s3a));
888     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
889     PetscCall(VecGetArrayRead(rr, &ar));
890     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
891     PetscCall(VecGetArray(mdn->lvec, &r));
892     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
894     PetscCall(VecRestoreArrayRead(rr, &ar));
895     for (i = 0; i < n; i++) {
896       x = r[i];
897       v = vv + i * lda;
898       for (j = 0; j < m; j++) (*v++) *= x;
899     }
900     PetscCall(VecRestoreArray(mdn->lvec, &r));
901     PetscCall(PetscLogFlops(1.0 * n * m));
902   }
903   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
904   PetscFunctionReturn(PETSC_SUCCESS);
905 }
906 
907 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
908 {
909   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
910   PetscInt           i, j;
911   PetscMPIInt        size;
912   PetscReal          sum = 0.0;
913   const PetscScalar *av, *v;
914 
915   PetscFunctionBegin;
916   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
917   v = av;
918   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
919   if (size == 1) {
920     PetscCall(MatNorm(mdn->A, type, nrm));
921   } else {
922     if (type == NORM_FROBENIUS) {
923       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
924         sum += PetscRealPart(PetscConj(*v) * (*v));
925         v++;
926       }
927       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
928       *nrm = PetscSqrtReal(*nrm);
929       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
930     } else if (type == NORM_1) {
931       PetscReal *tmp, *tmp2;
932       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
933       *nrm = 0.0;
934       v    = av;
935       for (j = 0; j < mdn->A->cmap->n; j++) {
936         for (i = 0; i < mdn->A->rmap->n; i++) {
937           tmp[j] += PetscAbsScalar(*v);
938           v++;
939         }
940       }
941       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
942       for (j = 0; j < A->cmap->N; j++) {
943         if (tmp2[j] > *nrm) *nrm = tmp2[j];
944       }
945       PetscCall(PetscFree2(tmp, tmp2));
946       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
947     } else if (type == NORM_INFINITY) { /* max row norm */
948       PetscReal ntemp;
949       PetscCall(MatNorm(mdn->A, type, &ntemp));
950       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
951     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
952   }
953   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
958 {
959   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
960   Mat           B;
961   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
962   PetscInt      j, i, lda;
963   PetscScalar  *v;
964 
965   PetscFunctionBegin;
966   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
967   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
968     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
969     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
970     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
971     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
972   } else B = *matout;
973 
974   m = a->A->rmap->n;
975   n = a->A->cmap->n;
976   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
977   PetscCall(MatDenseGetLDA(a->A, &lda));
978   PetscCall(PetscMalloc1(m, &rwork));
979   for (i = 0; i < m; i++) rwork[i] = rstart + i;
980   for (j = 0; j < n; j++) {
981     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
982     v += lda;
983   }
984   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
985   PetscCall(PetscFree(rwork));
986   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
987   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
989     *matout = B;
990   } else {
991     PetscCall(MatHeaderMerge(A, &B));
992   }
993   PetscFunctionReturn(PETSC_SUCCESS);
994 }
995 
996 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
997 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
998 
999 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1000 {
1001   PetscFunctionBegin;
1002   PetscCall(PetscLayoutSetUp(A->rmap));
1003   PetscCall(PetscLayoutSetUp(A->cmap));
1004   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1009 {
1010   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1011 
1012   PetscFunctionBegin;
1013   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1018 {
1019   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1020 
1021   PetscFunctionBegin;
1022   PetscCall(MatConjugate(a->A));
1023   PetscFunctionReturn(PETSC_SUCCESS);
1024 }
1025 
1026 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1027 {
1028   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(MatRealPart(a->A));
1032   PetscFunctionReturn(PETSC_SUCCESS);
1033 }
1034 
1035 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1036 {
1037   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1038 
1039   PetscFunctionBegin;
1040   PetscCall(MatImaginaryPart(a->A));
1041   PetscFunctionReturn(PETSC_SUCCESS);
1042 }
1043 
1044 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1045 {
1046   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1047 
1048   PetscFunctionBegin;
1049   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1050   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1051   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1056 
1057 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1058 {
1059   PetscInt      i, m, n;
1060   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1061   PetscReal    *work;
1062 
1063   PetscFunctionBegin;
1064   PetscCall(MatGetSize(A, &m, &n));
1065   PetscCall(PetscMalloc1(n, &work));
1066   if (type == REDUCTION_MEAN_REALPART) {
1067     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1068   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1069     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1070   } else {
1071     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1072   }
1073   if (type == NORM_2) {
1074     for (i = 0; i < n; i++) work[i] *= work[i];
1075   }
1076   if (type == NORM_INFINITY) {
1077     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1078   } else {
1079     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1080   }
1081   PetscCall(PetscFree(work));
1082   if (type == NORM_2) {
1083     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1084   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1085     for (i = 0; i < n; i++) reductions[i] /= m;
1086   }
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1091 {
1092   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1093 
1094   PetscFunctionBegin;
1095   PetscCall(MatSetRandom(d->A, rctx));
1096 #if defined(PETSC_HAVE_DEVICE)
1097   x->offloadmask = d->A->offloadmask;
1098 #endif
1099   PetscFunctionReturn(PETSC_SUCCESS);
1100 }
1101 
1102 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1103 {
1104   PetscFunctionBegin;
1105   *missing = PETSC_FALSE;
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1110 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1111 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1112 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1113 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1114 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1115 
1116 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1117                                        MatGetRow_MPIDense,
1118                                        MatRestoreRow_MPIDense,
1119                                        MatMult_MPIDense,
1120                                        /*  4*/ MatMultAdd_MPIDense,
1121                                        MatMultTranspose_MPIDense,
1122                                        MatMultTransposeAdd_MPIDense,
1123                                        NULL,
1124                                        NULL,
1125                                        NULL,
1126                                        /* 10*/ NULL,
1127                                        NULL,
1128                                        NULL,
1129                                        NULL,
1130                                        MatTranspose_MPIDense,
1131                                        /* 15*/ MatGetInfo_MPIDense,
1132                                        MatEqual_MPIDense,
1133                                        MatGetDiagonal_MPIDense,
1134                                        MatDiagonalScale_MPIDense,
1135                                        MatNorm_MPIDense,
1136                                        /* 20*/ MatAssemblyBegin_MPIDense,
1137                                        MatAssemblyEnd_MPIDense,
1138                                        MatSetOption_MPIDense,
1139                                        MatZeroEntries_MPIDense,
1140                                        /* 24*/ MatZeroRows_MPIDense,
1141                                        NULL,
1142                                        NULL,
1143                                        NULL,
1144                                        NULL,
1145                                        /* 29*/ MatSetUp_MPIDense,
1146                                        NULL,
1147                                        NULL,
1148                                        MatGetDiagonalBlock_MPIDense,
1149                                        NULL,
1150                                        /* 34*/ MatDuplicate_MPIDense,
1151                                        NULL,
1152                                        NULL,
1153                                        NULL,
1154                                        NULL,
1155                                        /* 39*/ MatAXPY_MPIDense,
1156                                        MatCreateSubMatrices_MPIDense,
1157                                        NULL,
1158                                        MatGetValues_MPIDense,
1159                                        MatCopy_MPIDense,
1160                                        /* 44*/ NULL,
1161                                        MatScale_MPIDense,
1162                                        MatShift_MPIDense,
1163                                        NULL,
1164                                        NULL,
1165                                        /* 49*/ MatSetRandom_MPIDense,
1166                                        NULL,
1167                                        NULL,
1168                                        NULL,
1169                                        NULL,
1170                                        /* 54*/ NULL,
1171                                        NULL,
1172                                        NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1176                                        MatDestroy_MPIDense,
1177                                        MatView_MPIDense,
1178                                        NULL,
1179                                        NULL,
1180                                        /* 64*/ NULL,
1181                                        NULL,
1182                                        NULL,
1183                                        NULL,
1184                                        NULL,
1185                                        /* 69*/ NULL,
1186                                        NULL,
1187                                        NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        /* 74*/ NULL,
1191                                        NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        /* 79*/ NULL,
1196                                        NULL,
1197                                        NULL,
1198                                        NULL,
1199                                        /* 83*/ MatLoad_MPIDense,
1200                                        NULL,
1201                                        NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        NULL,
1205                                        /* 89*/ NULL,
1206                                        NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        /* 94*/ NULL,
1211                                        NULL,
1212                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1213                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1214                                        NULL,
1215                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1216                                        NULL,
1217                                        NULL,
1218                                        MatConjugate_MPIDense,
1219                                        NULL,
1220                                        /*104*/ NULL,
1221                                        MatRealPart_MPIDense,
1222                                        MatImaginaryPart_MPIDense,
1223                                        NULL,
1224                                        NULL,
1225                                        /*109*/ NULL,
1226                                        NULL,
1227                                        NULL,
1228                                        MatGetColumnVector_MPIDense,
1229                                        MatMissingDiagonal_MPIDense,
1230                                        /*114*/ NULL,
1231                                        NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        /*119*/ NULL,
1236                                        NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        NULL,
1240                                        /*124*/ NULL,
1241                                        MatGetColumnReductions_MPIDense,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        /*129*/ NULL,
1246                                        NULL,
1247                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1248                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1249                                        NULL,
1250                                        /*134*/ NULL,
1251                                        NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        /*139*/ NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1261                                        /*145*/ NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /*150*/ NULL,
1267                                        NULL};
1268 
1269 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1270 {
1271   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1272   MatType       mtype = MATSEQDENSE;
1273 
1274   PetscFunctionBegin;
1275   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1276   PetscCall(PetscLayoutSetUp(mat->rmap));
1277   PetscCall(PetscLayoutSetUp(mat->cmap));
1278   if (!a->A) {
1279     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1280     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1281   }
1282 #if defined(PETSC_HAVE_CUDA)
1283   PetscBool iscuda;
1284   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1285   if (iscuda) mtype = MATSEQDENSECUDA;
1286 #endif
1287 #if defined(PETSC_HAVE_HIP)
1288   PetscBool iship;
1289   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1290   if (iship) mtype = MATSEQDENSEHIP;
1291 #endif
1292   PetscCall(MatSetType(a->A, mtype));
1293   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1294 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1295   mat->offloadmask = a->A->offloadmask;
1296 #endif
1297   mat->preallocated = PETSC_TRUE;
1298   mat->assembled    = PETSC_TRUE;
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1303 {
1304   Mat B, C;
1305 
1306   PetscFunctionBegin;
1307   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1308   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1309   PetscCall(MatDestroy(&C));
1310   if (reuse == MAT_REUSE_MATRIX) {
1311     C = *newmat;
1312   } else C = NULL;
1313   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1314   PetscCall(MatDestroy(&B));
1315   if (reuse == MAT_INPLACE_MATRIX) {
1316     PetscCall(MatHeaderReplace(A, &C));
1317   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1318   PetscFunctionReturn(PETSC_SUCCESS);
1319 }
1320 
1321 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1322 {
1323   Mat B, C;
1324 
1325   PetscFunctionBegin;
1326   PetscCall(MatDenseGetLocalMatrix(A, &C));
1327   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1328   if (reuse == MAT_REUSE_MATRIX) {
1329     C = *newmat;
1330   } else C = NULL;
1331   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1332   PetscCall(MatDestroy(&B));
1333   if (reuse == MAT_INPLACE_MATRIX) {
1334     PetscCall(MatHeaderReplace(A, &C));
1335   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1336   PetscFunctionReturn(PETSC_SUCCESS);
1337 }
1338 
1339 #if defined(PETSC_HAVE_ELEMENTAL)
1340 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1341 {
1342   Mat          mat_elemental;
1343   PetscScalar *v;
1344   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1345 
1346   PetscFunctionBegin;
1347   if (reuse == MAT_REUSE_MATRIX) {
1348     mat_elemental = *newmat;
1349     PetscCall(MatZeroEntries(*newmat));
1350   } else {
1351     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1352     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1353     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1354     PetscCall(MatSetUp(mat_elemental));
1355     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1356   }
1357 
1358   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1359   for (i = 0; i < N; i++) cols[i] = i;
1360   for (i = 0; i < m; i++) rows[i] = rstart + i;
1361 
1362   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1363   PetscCall(MatDenseGetArray(A, &v));
1364   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1365   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1366   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1367   PetscCall(MatDenseRestoreArray(A, &v));
1368   PetscCall(PetscFree2(rows, cols));
1369 
1370   if (reuse == MAT_INPLACE_MATRIX) {
1371     PetscCall(MatHeaderReplace(A, &mat_elemental));
1372   } else {
1373     *newmat = mat_elemental;
1374   }
1375   PetscFunctionReturn(PETSC_SUCCESS);
1376 }
1377 #endif
1378 
1379 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1380 {
1381   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1382 
1383   PetscFunctionBegin;
1384   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1385   PetscFunctionReturn(PETSC_SUCCESS);
1386 }
1387 
1388 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1389 {
1390   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1391 
1392   PetscFunctionBegin;
1393   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1394   PetscFunctionReturn(PETSC_SUCCESS);
1395 }
1396 
1397 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1398 {
1399   Mat_MPIDense *mat;
1400   PetscInt      m, nloc, N;
1401 
1402   PetscFunctionBegin;
1403   PetscCall(MatGetSize(inmat, &m, &N));
1404   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1405   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1406     PetscInt sum;
1407 
1408     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1409     /* Check sum(n) = N */
1410     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1411     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1412 
1413     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1414     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1415   }
1416 
1417   /* numeric phase */
1418   mat = (Mat_MPIDense *)(*outmat)->data;
1419   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1420   PetscFunctionReturn(PETSC_SUCCESS);
1421 }
1422 
1423 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1424 {
1425   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1426   PetscInt      lda;
1427 
1428   PetscFunctionBegin;
1429   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1430   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1431   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1432   a->vecinuse = col + 1;
1433   PetscCall(MatDenseGetLDA(a->A, &lda));
1434   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1435   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1436   *v = a->cvec;
1437   PetscFunctionReturn(PETSC_SUCCESS);
1438 }
1439 
1440 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1441 {
1442   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1443 
1444   PetscFunctionBegin;
1445   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1446   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1447   a->vecinuse = 0;
1448   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1449   PetscCall(VecResetArray(a->cvec));
1450   if (v) *v = NULL;
1451   PetscFunctionReturn(PETSC_SUCCESS);
1452 }
1453 
1454 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1455 {
1456   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1457   PetscInt      lda;
1458 
1459   PetscFunctionBegin;
1460   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1461   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1462   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1463   a->vecinuse = col + 1;
1464   PetscCall(MatDenseGetLDA(a->A, &lda));
1465   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1466   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1467   PetscCall(VecLockReadPush(a->cvec));
1468   *v = a->cvec;
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1473 {
1474   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1475 
1476   PetscFunctionBegin;
1477   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1478   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1479   a->vecinuse = 0;
1480   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1481   PetscCall(VecLockReadPop(a->cvec));
1482   PetscCall(VecResetArray(a->cvec));
1483   if (v) *v = NULL;
1484   PetscFunctionReturn(PETSC_SUCCESS);
1485 }
1486 
1487 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1488 {
1489   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1490   PetscInt      lda;
1491 
1492   PetscFunctionBegin;
1493   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1494   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1495   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1496   a->vecinuse = col + 1;
1497   PetscCall(MatDenseGetLDA(a->A, &lda));
1498   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1499   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1500   *v = a->cvec;
1501   PetscFunctionReturn(PETSC_SUCCESS);
1502 }
1503 
1504 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1505 {
1506   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1507 
1508   PetscFunctionBegin;
1509   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1510   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1511   a->vecinuse = 0;
1512   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1513   PetscCall(VecResetArray(a->cvec));
1514   if (v) *v = NULL;
1515   PetscFunctionReturn(PETSC_SUCCESS);
1516 }
1517 
1518 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1519 {
1520   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1521   Mat_MPIDense *c;
1522   MPI_Comm      comm;
1523   PetscInt      pbegin, pend;
1524 
1525   PetscFunctionBegin;
1526   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1527   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1528   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1529   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1530   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1531   if (!a->cmat) {
1532     PetscCall(MatCreate(comm, &a->cmat));
1533     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1534     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1535     else {
1536       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1537       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1538       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1539     }
1540     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1541     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1542   } else {
1543     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1544     if (same && a->cmat->rmap->N != A->rmap->N) {
1545       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1546       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1547     }
1548     if (!same) {
1549       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1550       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1551       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1552       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1553       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1554     }
1555     if (cend - cbegin != a->cmat->cmap->N) {
1556       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1557       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1558       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1559       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1560     }
1561   }
1562   c = (Mat_MPIDense *)a->cmat->data;
1563   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1564   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1565 
1566   a->cmat->preallocated = PETSC_TRUE;
1567   a->cmat->assembled    = PETSC_TRUE;
1568 #if defined(PETSC_HAVE_DEVICE)
1569   a->cmat->offloadmask = c->A->offloadmask;
1570 #endif
1571   a->matinuse = cbegin + 1;
1572   *v          = a->cmat;
1573   PetscFunctionReturn(PETSC_SUCCESS);
1574 }
1575 
1576 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1577 {
1578   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1579   Mat_MPIDense *c;
1580 
1581   PetscFunctionBegin;
1582   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1583   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1584   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1585   a->matinuse = 0;
1586   c           = (Mat_MPIDense *)a->cmat->data;
1587   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1588   if (v) *v = NULL;
1589 #if defined(PETSC_HAVE_DEVICE)
1590   A->offloadmask = a->A->offloadmask;
1591 #endif
1592   PetscFunctionReturn(PETSC_SUCCESS);
1593 }
1594 
1595 /*MC
1596    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1597 
1598    Options Database Key:
1599 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1600 
1601   Level: beginner
1602 
1603 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1604 M*/
1605 PetscErrorCode MatCreate_MPIDense(Mat mat)
1606 {
1607   Mat_MPIDense *a;
1608 
1609   PetscFunctionBegin;
1610   PetscCall(PetscNew(&a));
1611   mat->data   = (void *)a;
1612   mat->ops[0] = MatOps_Values;
1613 
1614   mat->insertmode = NOT_SET_VALUES;
1615 
1616   /* build cache for off array entries formed */
1617   a->donotstash = PETSC_FALSE;
1618 
1619   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1620 
1621   /* stuff used for matrix vector multiply */
1622   a->lvec        = NULL;
1623   a->Mvctx       = NULL;
1624   a->roworiented = PETSC_TRUE;
1625 
1626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1647 #if defined(PETSC_HAVE_ELEMENTAL)
1648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1649 #endif
1650 #if defined(PETSC_HAVE_SCALAPACK)
1651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1652 #endif
1653 #if defined(PETSC_HAVE_CUDA)
1654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1655 #endif
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1659 #if defined(PETSC_HAVE_CUDA)
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1662 #endif
1663 #if defined(PETSC_HAVE_HIP)
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1667 #endif
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1670   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1671   PetscFunctionReturn(PETSC_SUCCESS);
1672 }
1673 
1674 /*MC
1675    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1676 
1677    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1678    and `MATMPIDENSE` otherwise.
1679 
1680    Options Database Key:
1681 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1682 
1683   Level: beginner
1684 
1685 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1686 M*/
1687 
1688 /*@C
1689   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1690 
1691   Collective
1692 
1693   Input Parameters:
1694 + B    - the matrix
1695 - data - optional location of matrix data.  Set to `NULL` for PETSc
1696    to control all matrix memory allocation.
1697 
1698   Level: intermediate
1699 
1700   Notes:
1701   The dense format is fully compatible with standard Fortran
1702   storage by columns.
1703 
1704   The data input variable is intended primarily for Fortran programmers
1705   who wish to allocate their own matrix memory space.  Most users should
1706   set `data` to `NULL`.
1707 
1708 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1709 @*/
1710 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1711 {
1712   PetscFunctionBegin;
1713   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1714   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1715   PetscFunctionReturn(PETSC_SUCCESS);
1716 }
1717 
1718 /*@
1719   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1720   array provided by the user. This is useful to avoid copying an array
1721   into a matrix
1722 
1723   Not Collective
1724 
1725   Input Parameters:
1726 + mat   - the matrix
1727 - array - the array in column major order
1728 
1729   Level: developer
1730 
1731   Note:
1732   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1733   freed when the matrix is destroyed.
1734 
1735 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1736           `MatDenseReplaceArray()`
1737 @*/
1738 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1739 {
1740   PetscFunctionBegin;
1741   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1742   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1743   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1744 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1745   mat->offloadmask = PETSC_OFFLOAD_CPU;
1746 #endif
1747   PetscFunctionReturn(PETSC_SUCCESS);
1748 }
1749 
1750 /*@
1751   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1752 
1753   Not Collective
1754 
1755   Input Parameter:
1756 . mat - the matrix
1757 
1758   Level: developer
1759 
1760   Note:
1761   You can only call this after a call to `MatDensePlaceArray()`
1762 
1763 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1764 @*/
1765 PetscErrorCode MatDenseResetArray(Mat mat)
1766 {
1767   PetscFunctionBegin;
1768   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1769   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1770   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1771   PetscFunctionReturn(PETSC_SUCCESS);
1772 }
1773 
1774 /*@
1775   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1776   array provided by the user. This is useful to avoid copying an array
1777   into a matrix
1778 
1779   Not Collective
1780 
1781   Input Parameters:
1782 + mat   - the matrix
1783 - array - the array in column major order
1784 
1785   Level: developer
1786 
1787   Note:
1788   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1789   freed by the user. It will be freed when the matrix is destroyed.
1790 
1791 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1792 @*/
1793 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1794 {
1795   PetscFunctionBegin;
1796   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1797   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1798   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1799 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1800   mat->offloadmask = PETSC_OFFLOAD_CPU;
1801 #endif
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 /*@C
1806   MatCreateDense - Creates a matrix in `MATDENSE` format.
1807 
1808   Collective
1809 
1810   Input Parameters:
1811 + comm - MPI communicator
1812 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1813 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1814 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1815 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1816 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1817    to control all matrix memory allocation.
1818 
1819   Output Parameter:
1820 . A - the matrix
1821 
1822   Level: intermediate
1823 
1824   Notes:
1825   The dense format is fully compatible with standard Fortran
1826   storage by columns.
1827 
1828   Although local portions of the matrix are stored in column-major
1829   order, the matrix is partitioned across MPI ranks by row.
1830 
1831   The data input variable is intended primarily for Fortran programmers
1832   who wish to allocate their own matrix memory space.  Most users should
1833   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1834 
1835   The user MUST specify either the local or global matrix dimensions
1836   (possibly both).
1837 
1838 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1839 @*/
1840 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1841 {
1842   PetscFunctionBegin;
1843   PetscCall(MatCreate(comm, A));
1844   PetscCall(MatSetSizes(*A, m, n, M, N));
1845   PetscCall(MatSetType(*A, MATDENSE));
1846   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1847   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1848   PetscFunctionReturn(PETSC_SUCCESS);
1849 }
1850 
1851 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1852 {
1853   Mat           mat;
1854   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1855 
1856   PetscFunctionBegin;
1857   *newmat = NULL;
1858   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1859   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1860   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1861   a = (Mat_MPIDense *)mat->data;
1862 
1863   mat->factortype   = A->factortype;
1864   mat->assembled    = PETSC_TRUE;
1865   mat->preallocated = PETSC_TRUE;
1866 
1867   mat->insertmode = NOT_SET_VALUES;
1868   a->donotstash   = oldmat->donotstash;
1869 
1870   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1871   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1872 
1873   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1874 
1875   *newmat = mat;
1876   PetscFunctionReturn(PETSC_SUCCESS);
1877 }
1878 
1879 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1880 {
1881   PetscBool isbinary;
1882 #if defined(PETSC_HAVE_HDF5)
1883   PetscBool ishdf5;
1884 #endif
1885 
1886   PetscFunctionBegin;
1887   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1888   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1889   /* force binary viewer to load .info file if it has not yet done so */
1890   PetscCall(PetscViewerSetUp(viewer));
1891   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1892 #if defined(PETSC_HAVE_HDF5)
1893   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1894 #endif
1895   if (isbinary) {
1896     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1897 #if defined(PETSC_HAVE_HDF5)
1898   } else if (ishdf5) {
1899     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1900 #endif
1901   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1902   PetscFunctionReturn(PETSC_SUCCESS);
1903 }
1904 
1905 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1906 {
1907   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1908   Mat           a, b;
1909 
1910   PetscFunctionBegin;
1911   a = matA->A;
1912   b = matB->A;
1913   PetscCall(MatEqual(a, b, flag));
1914   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1915   PetscFunctionReturn(PETSC_SUCCESS);
1916 }
1917 
1918 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1919 {
1920   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1921 
1922   PetscFunctionBegin;
1923   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1924   PetscCall(MatDestroy(&atb->atb));
1925   PetscCall(PetscFree(atb));
1926   PetscFunctionReturn(PETSC_SUCCESS);
1927 }
1928 
1929 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1930 {
1931   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1932 
1933   PetscFunctionBegin;
1934   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1935   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1936   PetscCall(PetscFree(abt));
1937   PetscFunctionReturn(PETSC_SUCCESS);
1938 }
1939 
1940 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1941 {
1942   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1943   Mat_TransMatMultDense *atb;
1944   MPI_Comm               comm;
1945   PetscMPIInt            size, *recvcounts;
1946   PetscScalar           *carray, *sendbuf;
1947   const PetscScalar     *atbarray;
1948   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1949   const PetscInt        *ranges;
1950 
1951   PetscFunctionBegin;
1952   MatCheckProduct(C, 3);
1953   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1954   atb        = (Mat_TransMatMultDense *)C->product->data;
1955   recvcounts = atb->recvcounts;
1956   sendbuf    = atb->sendbuf;
1957 
1958   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1959   PetscCallMPI(MPI_Comm_size(comm, &size));
1960 
1961   /* compute atbarray = aseq^T * bseq */
1962   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1963 
1964   PetscCall(MatGetOwnershipRanges(C, &ranges));
1965 
1966   /* arrange atbarray into sendbuf */
1967   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1968   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1969   for (proc = 0, k = 0; proc < size; proc++) {
1970     for (j = 0; j < cN; j++) {
1971       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1972     }
1973   }
1974   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1975 
1976   /* sum all atbarray to local values of C */
1977   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1978   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1979   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1980   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1981   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1982   PetscFunctionReturn(PETSC_SUCCESS);
1983 }
1984 
1985 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1986 {
1987   MPI_Comm               comm;
1988   PetscMPIInt            size;
1989   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1990   Mat_TransMatMultDense *atb;
1991   PetscBool              cisdense = PETSC_FALSE;
1992   PetscInt               i;
1993   const PetscInt        *ranges;
1994 
1995   PetscFunctionBegin;
1996   MatCheckProduct(C, 4);
1997   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
1998   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1999   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2000     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2001   }
2002 
2003   /* create matrix product C */
2004   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2005 #if defined(PETSC_HAVE_CUDA)
2006   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2007 #endif
2008 #if defined(PETSC_HAVE_HIP)
2009   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2010 #endif
2011   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2012   PetscCall(MatSetUp(C));
2013 
2014   /* create data structure for reuse C */
2015   PetscCallMPI(MPI_Comm_size(comm, &size));
2016   PetscCall(PetscNew(&atb));
2017   cM = C->rmap->N;
2018   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2019   PetscCall(MatGetOwnershipRanges(C, &ranges));
2020   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2021 
2022   C->product->data    = atb;
2023   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2024   PetscFunctionReturn(PETSC_SUCCESS);
2025 }
2026 
2027 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2028 {
2029   MPI_Comm               comm;
2030   PetscMPIInt            i, size;
2031   PetscInt               maxRows, bufsiz;
2032   PetscMPIInt            tag;
2033   PetscInt               alg;
2034   Mat_MatTransMultDense *abt;
2035   Mat_Product           *product = C->product;
2036   PetscBool              flg;
2037 
2038   PetscFunctionBegin;
2039   MatCheckProduct(C, 4);
2040   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2041   /* check local size of A and B */
2042   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2043 
2044   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2045   alg = flg ? 0 : 1;
2046 
2047   /* setup matrix product C */
2048   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2049   PetscCall(MatSetType(C, MATMPIDENSE));
2050   PetscCall(MatSetUp(C));
2051   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2052 
2053   /* create data structure for reuse C */
2054   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2055   PetscCallMPI(MPI_Comm_size(comm, &size));
2056   PetscCall(PetscNew(&abt));
2057   abt->tag = tag;
2058   abt->alg = alg;
2059   switch (alg) {
2060   case 1: /* alg: "cyclic" */
2061     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2062     bufsiz = A->cmap->N * maxRows;
2063     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2064     break;
2065   default: /* alg: "allgatherv" */
2066     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2067     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2068     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2069     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2070     break;
2071   }
2072 
2073   C->product->data                = abt;
2074   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2075   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2076   PetscFunctionReturn(PETSC_SUCCESS);
2077 }
2078 
2079 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2080 {
2081   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2082   Mat_MatTransMultDense *abt;
2083   MPI_Comm               comm;
2084   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2085   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2086   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2087   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2088   const PetscScalar     *av, *bv;
2089   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2090   MPI_Request            reqs[2];
2091   const PetscInt        *ranges;
2092 
2093   PetscFunctionBegin;
2094   MatCheckProduct(C, 3);
2095   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2096   abt = (Mat_MatTransMultDense *)C->product->data;
2097   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2098   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2099   PetscCallMPI(MPI_Comm_size(comm, &size));
2100   PetscCall(MatDenseGetArrayRead(a->A, &av));
2101   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2102   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2103   PetscCall(MatDenseGetLDA(a->A, &i));
2104   PetscCall(PetscBLASIntCast(i, &alda));
2105   PetscCall(MatDenseGetLDA(b->A, &i));
2106   PetscCall(PetscBLASIntCast(i, &blda));
2107   PetscCall(MatDenseGetLDA(c->A, &i));
2108   PetscCall(PetscBLASIntCast(i, &clda));
2109   PetscCall(MatGetOwnershipRanges(B, &ranges));
2110   bn = B->rmap->n;
2111   if (blda == bn) {
2112     sendbuf = (PetscScalar *)bv;
2113   } else {
2114     sendbuf = abt->buf[0];
2115     for (k = 0, i = 0; i < cK; i++) {
2116       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2117     }
2118   }
2119   if (size > 1) {
2120     sendto   = (rank + size - 1) % size;
2121     recvfrom = (rank + size + 1) % size;
2122   } else {
2123     sendto = recvfrom = 0;
2124   }
2125   PetscCall(PetscBLASIntCast(cK, &ck));
2126   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2127   recvisfrom = rank;
2128   for (i = 0; i < size; i++) {
2129     /* we have finished receiving in sending, bufs can be read/modified */
2130     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2131     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2132 
2133     if (nextrecvisfrom != rank) {
2134       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2135       sendsiz = cK * bn;
2136       recvsiz = cK * nextbn;
2137       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2138       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2139       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2140     }
2141 
2142     /* local aseq * sendbuf^T */
2143     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2144     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2145 
2146     if (nextrecvisfrom != rank) {
2147       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2148       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2149     }
2150     bn         = nextbn;
2151     recvisfrom = nextrecvisfrom;
2152     sendbuf    = recvbuf;
2153   }
2154   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2155   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2156   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2157   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2158   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2159   PetscFunctionReturn(PETSC_SUCCESS);
2160 }
2161 
2162 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2163 {
2164   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2165   Mat_MatTransMultDense *abt;
2166   MPI_Comm               comm;
2167   PetscMPIInt            size;
2168   PetscScalar           *cv, *sendbuf, *recvbuf;
2169   const PetscScalar     *av, *bv;
2170   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2171   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2172   PetscBLASInt           cm, cn, ck, alda, clda;
2173 
2174   PetscFunctionBegin;
2175   MatCheckProduct(C, 3);
2176   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2177   abt = (Mat_MatTransMultDense *)C->product->data;
2178   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2179   PetscCallMPI(MPI_Comm_size(comm, &size));
2180   PetscCall(MatDenseGetArrayRead(a->A, &av));
2181   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2182   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2183   PetscCall(MatDenseGetLDA(a->A, &i));
2184   PetscCall(PetscBLASIntCast(i, &alda));
2185   PetscCall(MatDenseGetLDA(b->A, &blda));
2186   PetscCall(MatDenseGetLDA(c->A, &i));
2187   PetscCall(PetscBLASIntCast(i, &clda));
2188   /* copy transpose of B into buf[0] */
2189   bn      = B->rmap->n;
2190   sendbuf = abt->buf[0];
2191   recvbuf = abt->buf[1];
2192   for (k = 0, j = 0; j < bn; j++) {
2193     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2194   }
2195   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2196   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2197   PetscCall(PetscBLASIntCast(cK, &ck));
2198   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2199   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2200   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2201   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2202   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2203   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2204   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2205   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2206   PetscFunctionReturn(PETSC_SUCCESS);
2207 }
2208 
2209 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2210 {
2211   Mat_MatTransMultDense *abt;
2212 
2213   PetscFunctionBegin;
2214   MatCheckProduct(C, 3);
2215   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2216   abt = (Mat_MatTransMultDense *)C->product->data;
2217   switch (abt->alg) {
2218   case 1:
2219     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2220     break;
2221   default:
2222     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2223     break;
2224   }
2225   PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227 
2228 #if defined(PETSC_HAVE_ELEMENTAL)
2229 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2230 {
2231   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2232 
2233   PetscFunctionBegin;
2234   PetscCall(MatDestroy(&ab->Ce));
2235   PetscCall(MatDestroy(&ab->Ae));
2236   PetscCall(MatDestroy(&ab->Be));
2237   PetscCall(PetscFree(ab));
2238   PetscFunctionReturn(PETSC_SUCCESS);
2239 }
2240 
2241 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2242 {
2243   Mat_MatMultDense *ab;
2244 
2245   PetscFunctionBegin;
2246   MatCheckProduct(C, 3);
2247   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2248   ab = (Mat_MatMultDense *)C->product->data;
2249   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2250   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2251   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2252   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2253   PetscFunctionReturn(PETSC_SUCCESS);
2254 }
2255 
2256 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2257 {
2258   Mat               Ae, Be, Ce;
2259   Mat_MatMultDense *ab;
2260 
2261   PetscFunctionBegin;
2262   MatCheckProduct(C, 4);
2263   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2264   /* check local size of A and B */
2265   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2266     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2267   }
2268 
2269   /* create elemental matrices Ae and Be */
2270   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2271   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2272   PetscCall(MatSetType(Ae, MATELEMENTAL));
2273   PetscCall(MatSetUp(Ae));
2274   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2275 
2276   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2277   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2278   PetscCall(MatSetType(Be, MATELEMENTAL));
2279   PetscCall(MatSetUp(Be));
2280   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2281 
2282   /* compute symbolic Ce = Ae*Be */
2283   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2284   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2285 
2286   /* setup C */
2287   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2288   PetscCall(MatSetType(C, MATDENSE));
2289   PetscCall(MatSetUp(C));
2290 
2291   /* create data structure for reuse Cdense */
2292   PetscCall(PetscNew(&ab));
2293   ab->Ae = Ae;
2294   ab->Be = Be;
2295   ab->Ce = Ce;
2296 
2297   C->product->data       = ab;
2298   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2299   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2300   PetscFunctionReturn(PETSC_SUCCESS);
2301 }
2302 #endif
2303 
2304 #if defined(PETSC_HAVE_ELEMENTAL)
2305 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2306 {
2307   PetscFunctionBegin;
2308   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2309   C->ops->productsymbolic = MatProductSymbolic_AB;
2310   PetscFunctionReturn(PETSC_SUCCESS);
2311 }
2312 #endif
2313 
2314 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2315 {
2316   Mat_Product *product = C->product;
2317   Mat          A = product->A, B = product->B;
2318 
2319   PetscFunctionBegin;
2320   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2321     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2322   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2323   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2324   PetscFunctionReturn(PETSC_SUCCESS);
2325 }
2326 
2327 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2328 {
2329   Mat_Product *product     = C->product;
2330   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2331   PetscInt     alg, nalg = 2;
2332   PetscBool    flg = PETSC_FALSE;
2333 
2334   PetscFunctionBegin;
2335   /* Set default algorithm */
2336   alg = 0; /* default is allgatherv */
2337   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2338   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2339 
2340   /* Get runtime option */
2341   if (product->api_user) {
2342     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2343     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2344     PetscOptionsEnd();
2345   } else {
2346     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2347     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2348     PetscOptionsEnd();
2349   }
2350   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2351 
2352   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2353   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2354   PetscFunctionReturn(PETSC_SUCCESS);
2355 }
2356 
2357 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2358 {
2359   Mat_Product *product = C->product;
2360 
2361   PetscFunctionBegin;
2362   switch (product->type) {
2363 #if defined(PETSC_HAVE_ELEMENTAL)
2364   case MATPRODUCT_AB:
2365     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2366     break;
2367 #endif
2368   case MATPRODUCT_AtB:
2369     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2370     break;
2371   case MATPRODUCT_ABt:
2372     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2373     break;
2374   default:
2375     break;
2376   }
2377   PetscFunctionReturn(PETSC_SUCCESS);
2378 }
2379