xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 92bec4eefde5b79327e7cea3b0266e7580ec8183)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
470 
471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472 {
473   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474   const PetscScalar *ax;
475   PetscScalar       *ay;
476   PetscMemType       axmtype, aymtype;
477 
478   PetscFunctionBegin;
479   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
480   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
481   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
482   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
483   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
484   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
485   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
486   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
491 {
492   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
493   const PetscScalar *ax;
494   PetscScalar       *ay;
495   PetscMemType       axmtype, aymtype;
496 
497   PetscFunctionBegin;
498   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
510 {
511   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
512   const PetscScalar *ax;
513   PetscScalar       *ay;
514   PetscMemType       axmtype, aymtype;
515 
516   PetscFunctionBegin;
517   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
518   PetscCall(VecSet(yy, 0.0));
519   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
520   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
521   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
522   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
523   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
524   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
525   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecCopy(yy, zz));
539   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
553   PetscScalar       *x;
554   const PetscScalar *av;
555 
556   PetscFunctionBegin;
557   PetscCall(VecGetArray(v, &x));
558   PetscCall(VecGetSize(v, &ng));
559   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
560   PetscCall(VecGetLocalSize(v, &nl));
561   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
562   radd = A->rmap->rstart * m;
563   PetscCall(MatDenseGetArrayRead(a->A, &av));
564   PetscCall(MatDenseGetLDA(a->A, &lda));
565   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
566   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
567   PetscCall(PetscArrayzero(x + i, nl - i));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
578   PetscCall(MatStashDestroy_Private(&mat->stash));
579   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
580   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
581   PetscCall(MatDestroy(&mdn->A));
582   PetscCall(VecDestroy(&mdn->lvec));
583   PetscCall(PetscSFDestroy(&mdn->Mvctx));
584   PetscCall(VecDestroy(&mdn->cvec));
585   PetscCall(MatDestroy(&mdn->cmat));
586 
587   PetscCall(PetscFree(mat->data));
588   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
589 
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
603 #if defined(PETSC_HAVE_ELEMENTAL)
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
605 #endif
606 #if defined(PETSC_HAVE_SCALAPACK)
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
608 #endif
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
612 #if defined(PETSC_HAVE_CUDA)
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
631 #endif
632 #if defined(PETSC_HAVE_HIP)
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(PETSC_SUCCESS);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(PETSC_SUCCESS);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(PetscViewerFlush(viewer));
747     PetscCall(MatDestroy(&A));
748   }
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753 {
754   PetscBool iascii, isbinary, isdraw, issocket;
755 
756   PetscFunctionBegin;
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
761 
762   if (iascii || issocket || isdraw) {
763     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771   Mat            mdn = mat->A;
772   PetscLogDouble isend[5], irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size = 1.0;
776 
777   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
778 
779   isend[0] = info->nz_used;
780   isend[1] = info->nz_allocated;
781   isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;
783   isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A, 1);
823     PetscCall(MatSetOption(a->A, op, flg));
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A, 1);
827     a->roworiented = flg;
828     PetscCall(MatSetOption(a->A, op, flg));
829     break;
830   case MAT_FORCE_DIAGONAL_ENTRIES:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833   case MAT_SORTED_FULL:
834     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835     break;
836   case MAT_IGNORE_OFF_PROC_ENTRIES:
837     a->donotstash = flg;
838     break;
839   case MAT_SYMMETRIC:
840   case MAT_STRUCTURALLY_SYMMETRIC:
841   case MAT_HERMITIAN:
842   case MAT_SYMMETRY_ETERNAL:
843   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844   case MAT_SPD:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846   case MAT_IGNORE_ZERO_ENTRIES:
847   case MAT_SPD_ETERNAL:
848     /* if the diagonal matrix is square it inherits some of the properties above */
849     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850     break;
851   default:
852     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853   }
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860   const PetscScalar *l;
861   PetscScalar        x, *v, *vv, *r;
862   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
863 
864   PetscFunctionBegin;
865   PetscCall(MatDenseGetArray(mdn->A, &vv));
866   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867   PetscCall(MatGetLocalSize(A, &s2, &s3));
868   if (ll) {
869     PetscCall(VecGetLocalSize(ll, &s2a));
870     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871     PetscCall(VecGetArrayRead(ll, &l));
872     for (i = 0; i < m; i++) {
873       x = l[i];
874       v = vv + i;
875       for (j = 0; j < n; j++) {
876         (*v) *= x;
877         v += lda;
878       }
879     }
880     PetscCall(VecRestoreArrayRead(ll, &l));
881     PetscCall(PetscLogFlops(1.0 * n * m));
882   }
883   if (rr) {
884     const PetscScalar *ar;
885 
886     PetscCall(VecGetLocalSize(rr, &s3a));
887     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888     PetscCall(VecGetArrayRead(rr, &ar));
889     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890     PetscCall(VecGetArray(mdn->lvec, &r));
891     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(VecRestoreArrayRead(rr, &ar));
894     for (i = 0; i < n; i++) {
895       x = r[i];
896       v = vv + i * lda;
897       for (j = 0; j < m; j++) (*v++) *= x;
898     }
899     PetscCall(VecRestoreArray(mdn->lvec, &r));
900     PetscCall(PetscLogFlops(1.0 * n * m));
901   }
902   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907 {
908   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909   PetscInt           i, j;
910   PetscMPIInt        size;
911   PetscReal          sum = 0.0;
912   const PetscScalar *av, *v;
913 
914   PetscFunctionBegin;
915   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916   v = av;
917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918   if (size == 1) {
919     PetscCall(MatNorm(mdn->A, type, nrm));
920   } else {
921     if (type == NORM_FROBENIUS) {
922       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923         sum += PetscRealPart(PetscConj(*v) * (*v));
924         v++;
925       }
926       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927       *nrm = PetscSqrtReal(*nrm);
928       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929     } else if (type == NORM_1) {
930       PetscReal *tmp, *tmp2;
931       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932       *nrm = 0.0;
933       v    = av;
934       for (j = 0; j < mdn->A->cmap->n; j++) {
935         for (i = 0; i < mdn->A->rmap->n; i++) {
936           tmp[j] += PetscAbsScalar(*v);
937           v++;
938         }
939       }
940       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941       for (j = 0; j < A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       PetscCall(PetscFree2(tmp, tmp2));
945       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       PetscCall(MatNorm(mdn->A, type, &ntemp));
949       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951   }
952   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953   PetscFunctionReturn(PETSC_SUCCESS);
954 }
955 
956 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957 {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959   Mat           B;
960   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961   PetscInt      j, i, lda;
962   PetscScalar  *v;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971   } else B = *matout;
972 
973   m = a->A->rmap->n;
974   n = a->A->cmap->n;
975   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(MatDenseGetLDA(a->A, &lda));
977   PetscCall(PetscMalloc1(m, &rwork));
978   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979   for (j = 0; j < n; j++) {
980     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981     v += lda;
982   }
983   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984   PetscCall(PetscFree(rwork));
985   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     PetscCall(MatHeaderMerge(A, &B));
991   }
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
997 
998 static PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(PetscLayoutSetUp(A->rmap));
1002   PetscCall(PetscLayoutSetUp(A->cmap));
1003   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008 {
1009   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatConjugate(a->A));
1022   PetscFunctionReturn(PETSC_SUCCESS);
1023 }
1024 
1025 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatRealPart(a->A));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
1034 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(MatImaginaryPart(a->A));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044 {
1045   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1046 
1047   PetscFunctionBegin;
1048   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1055 
1056 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057 {
1058   PetscInt      i, m, n;
1059   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060   PetscReal    *work;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatGetSize(A, &m, &n));
1064   PetscCall(PetscMalloc1(n, &work));
1065   if (type == REDUCTION_MEAN_REALPART) {
1066     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069   } else {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071   }
1072   if (type == NORM_2) {
1073     for (i = 0; i < n; i++) work[i] *= work[i];
1074   }
1075   if (type == NORM_INFINITY) {
1076     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077   } else {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079   }
1080   PetscCall(PetscFree(work));
1081   if (type == NORM_2) {
1082     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084     for (i = 0; i < n; i++) reductions[i] /= m;
1085   }
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1090 {
1091   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatSetRandom(d->A, rctx));
1095 #if defined(PETSC_HAVE_DEVICE)
1096   x->offloadmask = d->A->offloadmask;
1097 #endif
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1102 {
1103   PetscFunctionBegin;
1104   *missing = PETSC_FALSE;
1105   PetscFunctionReturn(PETSC_SUCCESS);
1106 }
1107 
1108 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1109 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1110 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1111 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1112 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1113 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1114 
1115 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1116                                        MatGetRow_MPIDense,
1117                                        MatRestoreRow_MPIDense,
1118                                        MatMult_MPIDense,
1119                                        /*  4*/ MatMultAdd_MPIDense,
1120                                        MatMultTranspose_MPIDense,
1121                                        MatMultTransposeAdd_MPIDense,
1122                                        NULL,
1123                                        NULL,
1124                                        NULL,
1125                                        /* 10*/ NULL,
1126                                        NULL,
1127                                        NULL,
1128                                        NULL,
1129                                        MatTranspose_MPIDense,
1130                                        /* 15*/ MatGetInfo_MPIDense,
1131                                        MatEqual_MPIDense,
1132                                        MatGetDiagonal_MPIDense,
1133                                        MatDiagonalScale_MPIDense,
1134                                        MatNorm_MPIDense,
1135                                        /* 20*/ MatAssemblyBegin_MPIDense,
1136                                        MatAssemblyEnd_MPIDense,
1137                                        MatSetOption_MPIDense,
1138                                        MatZeroEntries_MPIDense,
1139                                        /* 24*/ MatZeroRows_MPIDense,
1140                                        NULL,
1141                                        NULL,
1142                                        NULL,
1143                                        NULL,
1144                                        /* 29*/ MatSetUp_MPIDense,
1145                                        NULL,
1146                                        NULL,
1147                                        MatGetDiagonalBlock_MPIDense,
1148                                        NULL,
1149                                        /* 34*/ MatDuplicate_MPIDense,
1150                                        NULL,
1151                                        NULL,
1152                                        NULL,
1153                                        NULL,
1154                                        /* 39*/ MatAXPY_MPIDense,
1155                                        MatCreateSubMatrices_MPIDense,
1156                                        NULL,
1157                                        MatGetValues_MPIDense,
1158                                        MatCopy_MPIDense,
1159                                        /* 44*/ NULL,
1160                                        MatScale_MPIDense,
1161                                        MatShift_MPIDense,
1162                                        NULL,
1163                                        NULL,
1164                                        /* 49*/ MatSetRandom_MPIDense,
1165                                        NULL,
1166                                        NULL,
1167                                        NULL,
1168                                        NULL,
1169                                        /* 54*/ NULL,
1170                                        NULL,
1171                                        NULL,
1172                                        NULL,
1173                                        NULL,
1174                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1175                                        MatDestroy_MPIDense,
1176                                        MatView_MPIDense,
1177                                        NULL,
1178                                        NULL,
1179                                        /* 64*/ NULL,
1180                                        NULL,
1181                                        NULL,
1182                                        NULL,
1183                                        NULL,
1184                                        /* 69*/ NULL,
1185                                        NULL,
1186                                        NULL,
1187                                        NULL,
1188                                        NULL,
1189                                        /* 74*/ NULL,
1190                                        NULL,
1191                                        NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        /* 79*/ NULL,
1195                                        NULL,
1196                                        NULL,
1197                                        NULL,
1198                                        /* 83*/ MatLoad_MPIDense,
1199                                        NULL,
1200                                        NULL,
1201                                        NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        /* 89*/ NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        /* 94*/ NULL,
1210                                        NULL,
1211                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1212                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1213                                        NULL,
1214                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1215                                        NULL,
1216                                        NULL,
1217                                        MatConjugate_MPIDense,
1218                                        NULL,
1219                                        /*104*/ NULL,
1220                                        MatRealPart_MPIDense,
1221                                        MatImaginaryPart_MPIDense,
1222                                        NULL,
1223                                        NULL,
1224                                        /*109*/ NULL,
1225                                        NULL,
1226                                        NULL,
1227                                        MatGetColumnVector_MPIDense,
1228                                        MatMissingDiagonal_MPIDense,
1229                                        /*114*/ NULL,
1230                                        NULL,
1231                                        NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        /*119*/ NULL,
1235                                        NULL,
1236                                        NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        /*124*/ NULL,
1240                                        MatGetColumnReductions_MPIDense,
1241                                        NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        /*129*/ NULL,
1245                                        NULL,
1246                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1247                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1248                                        NULL,
1249                                        /*134*/ NULL,
1250                                        NULL,
1251                                        NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        /*139*/ NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1260                                        /*145*/ NULL,
1261                                        NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        /*150*/ NULL,
1266                                        NULL};
1267 
1268 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1269 {
1270   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1271   MatType       mtype = MATSEQDENSE;
1272 
1273   PetscFunctionBegin;
1274   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1275   PetscCall(PetscLayoutSetUp(mat->rmap));
1276   PetscCall(PetscLayoutSetUp(mat->cmap));
1277   if (!a->A) {
1278     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1279     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1280   }
1281 #if defined(PETSC_HAVE_CUDA)
1282   PetscBool iscuda;
1283   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1284   if (iscuda) mtype = MATSEQDENSECUDA;
1285 #endif
1286 #if defined(PETSC_HAVE_HIP)
1287   PetscBool iship;
1288   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1289   if (iship) mtype = MATSEQDENSEHIP;
1290 #endif
1291   PetscCall(MatSetType(a->A, mtype));
1292   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1293 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1294   mat->offloadmask = a->A->offloadmask;
1295 #endif
1296   mat->preallocated = PETSC_TRUE;
1297   mat->assembled    = PETSC_TRUE;
1298   PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300 
1301 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1302 {
1303   Mat B, C;
1304 
1305   PetscFunctionBegin;
1306   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1307   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1308   PetscCall(MatDestroy(&C));
1309   if (reuse == MAT_REUSE_MATRIX) {
1310     C = *newmat;
1311   } else C = NULL;
1312   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1313   PetscCall(MatDestroy(&B));
1314   if (reuse == MAT_INPLACE_MATRIX) {
1315     PetscCall(MatHeaderReplace(A, &C));
1316   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1317   PetscFunctionReturn(PETSC_SUCCESS);
1318 }
1319 
1320 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1321 {
1322   Mat B, C;
1323 
1324   PetscFunctionBegin;
1325   PetscCall(MatDenseGetLocalMatrix(A, &C));
1326   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1327   if (reuse == MAT_REUSE_MATRIX) {
1328     C = *newmat;
1329   } else C = NULL;
1330   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1331   PetscCall(MatDestroy(&B));
1332   if (reuse == MAT_INPLACE_MATRIX) {
1333     PetscCall(MatHeaderReplace(A, &C));
1334   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1335   PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337 
1338 #if defined(PETSC_HAVE_ELEMENTAL)
1339 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1340 {
1341   Mat          mat_elemental;
1342   PetscScalar *v;
1343   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1344 
1345   PetscFunctionBegin;
1346   if (reuse == MAT_REUSE_MATRIX) {
1347     mat_elemental = *newmat;
1348     PetscCall(MatZeroEntries(*newmat));
1349   } else {
1350     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1351     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1352     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1353     PetscCall(MatSetUp(mat_elemental));
1354     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1355   }
1356 
1357   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1358   for (i = 0; i < N; i++) cols[i] = i;
1359   for (i = 0; i < m; i++) rows[i] = rstart + i;
1360 
1361   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1362   PetscCall(MatDenseGetArray(A, &v));
1363   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1364   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1365   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1366   PetscCall(MatDenseRestoreArray(A, &v));
1367   PetscCall(PetscFree2(rows, cols));
1368 
1369   if (reuse == MAT_INPLACE_MATRIX) {
1370     PetscCall(MatHeaderReplace(A, &mat_elemental));
1371   } else {
1372     *newmat = mat_elemental;
1373   }
1374   PetscFunctionReturn(PETSC_SUCCESS);
1375 }
1376 #endif
1377 
1378 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1379 {
1380   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1381 
1382   PetscFunctionBegin;
1383   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1388 {
1389   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1397 {
1398   Mat_MPIDense *mat;
1399   PetscInt      m, nloc, N;
1400 
1401   PetscFunctionBegin;
1402   PetscCall(MatGetSize(inmat, &m, &N));
1403   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1404   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1405     PetscInt sum;
1406 
1407     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1408     /* Check sum(n) = N */
1409     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1410     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1411 
1412     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1413     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1414   }
1415 
1416   /* numeric phase */
1417   mat = (Mat_MPIDense *)(*outmat)->data;
1418   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1419   PetscFunctionReturn(PETSC_SUCCESS);
1420 }
1421 
1422 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1423 {
1424   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1425   PetscInt      lda;
1426 
1427   PetscFunctionBegin;
1428   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1429   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1430   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1431   a->vecinuse = col + 1;
1432   PetscCall(MatDenseGetLDA(a->A, &lda));
1433   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1434   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1435   *v = a->cvec;
1436   PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438 
1439 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1440 {
1441   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1442 
1443   PetscFunctionBegin;
1444   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1445   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1446   a->vecinuse = 0;
1447   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1448   PetscCall(VecResetArray(a->cvec));
1449   if (v) *v = NULL;
1450   PetscFunctionReturn(PETSC_SUCCESS);
1451 }
1452 
1453 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1454 {
1455   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1456   PetscInt      lda;
1457 
1458   PetscFunctionBegin;
1459   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1460   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1461   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1462   a->vecinuse = col + 1;
1463   PetscCall(MatDenseGetLDA(a->A, &lda));
1464   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1465   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1466   PetscCall(VecLockReadPush(a->cvec));
1467   *v = a->cvec;
1468   PetscFunctionReturn(PETSC_SUCCESS);
1469 }
1470 
1471 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1472 {
1473   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1474 
1475   PetscFunctionBegin;
1476   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1477   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1478   a->vecinuse = 0;
1479   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1480   PetscCall(VecLockReadPop(a->cvec));
1481   PetscCall(VecResetArray(a->cvec));
1482   if (v) *v = NULL;
1483   PetscFunctionReturn(PETSC_SUCCESS);
1484 }
1485 
1486 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1487 {
1488   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1489   PetscInt      lda;
1490 
1491   PetscFunctionBegin;
1492   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1493   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1494   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1495   a->vecinuse = col + 1;
1496   PetscCall(MatDenseGetLDA(a->A, &lda));
1497   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1498   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1499   *v = a->cvec;
1500   PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502 
1503 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1504 {
1505   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1506 
1507   PetscFunctionBegin;
1508   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1509   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1510   a->vecinuse = 0;
1511   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1512   PetscCall(VecResetArray(a->cvec));
1513   if (v) *v = NULL;
1514   PetscFunctionReturn(PETSC_SUCCESS);
1515 }
1516 
1517 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1518 {
1519   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1520   Mat_MPIDense *c;
1521   MPI_Comm      comm;
1522   PetscInt      pbegin, pend;
1523 
1524   PetscFunctionBegin;
1525   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1526   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1527   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1528   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1529   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1530   if (!a->cmat) {
1531     PetscCall(MatCreate(comm, &a->cmat));
1532     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1533     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1534     else {
1535       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1536       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1537       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1538     }
1539     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1540     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1541   } else {
1542     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1543     if (same && a->cmat->rmap->N != A->rmap->N) {
1544       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1545       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1546     }
1547     if (!same) {
1548       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1549       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1550       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1551       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1552       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1553     }
1554     if (cend - cbegin != a->cmat->cmap->N) {
1555       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1556       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1557       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1558       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1559     }
1560   }
1561   c = (Mat_MPIDense *)a->cmat->data;
1562   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1563   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1564 
1565   a->cmat->preallocated = PETSC_TRUE;
1566   a->cmat->assembled    = PETSC_TRUE;
1567 #if defined(PETSC_HAVE_DEVICE)
1568   a->cmat->offloadmask = c->A->offloadmask;
1569 #endif
1570   a->matinuse = cbegin + 1;
1571   *v          = a->cmat;
1572   PetscFunctionReturn(PETSC_SUCCESS);
1573 }
1574 
1575 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1576 {
1577   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1578   Mat_MPIDense *c;
1579 
1580   PetscFunctionBegin;
1581   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1582   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1583   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1584   a->matinuse = 0;
1585   c           = (Mat_MPIDense *)a->cmat->data;
1586   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1587   if (v) *v = NULL;
1588 #if defined(PETSC_HAVE_DEVICE)
1589   A->offloadmask = a->A->offloadmask;
1590 #endif
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 /*MC
1595    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1596 
1597    Options Database Key:
1598 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1599 
1600   Level: beginner
1601 
1602 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1603 M*/
1604 PetscErrorCode MatCreate_MPIDense(Mat mat)
1605 {
1606   Mat_MPIDense *a;
1607 
1608   PetscFunctionBegin;
1609   PetscCall(PetscNew(&a));
1610   mat->data   = (void *)a;
1611   mat->ops[0] = MatOps_Values;
1612 
1613   mat->insertmode = NOT_SET_VALUES;
1614 
1615   /* build cache for off array entries formed */
1616   a->donotstash = PETSC_FALSE;
1617 
1618   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1619 
1620   /* stuff used for matrix vector multiply */
1621   a->lvec        = NULL;
1622   a->Mvctx       = NULL;
1623   a->roworiented = PETSC_TRUE;
1624 
1625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1646 #if defined(PETSC_HAVE_ELEMENTAL)
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1648 #endif
1649 #if defined(PETSC_HAVE_SCALAPACK)
1650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1651 #endif
1652 #if defined(PETSC_HAVE_CUDA)
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1654 #endif
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1658 #if defined(PETSC_HAVE_CUDA)
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1661 #endif
1662 #if defined(PETSC_HAVE_HIP)
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1666 #endif
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1669   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 /*MC
1674    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1675 
1676    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1677    and `MATMPIDENSE` otherwise.
1678 
1679    Options Database Key:
1680 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1681 
1682   Level: beginner
1683 
1684 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1685 M*/
1686 
1687 /*@C
1688   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1689 
1690   Collective
1691 
1692   Input Parameters:
1693 + B    - the matrix
1694 - data - optional location of matrix data.  Set to `NULL` for PETSc
1695    to control all matrix memory allocation.
1696 
1697   Level: intermediate
1698 
1699   Notes:
1700   The dense format is fully compatible with standard Fortran
1701   storage by columns.
1702 
1703   The data input variable is intended primarily for Fortran programmers
1704   who wish to allocate their own matrix memory space.  Most users should
1705   set `data` to `NULL`.
1706 
1707 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1708 @*/
1709 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1710 {
1711   PetscFunctionBegin;
1712   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1713   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 /*@
1718   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1719   array provided by the user. This is useful to avoid copying an array
1720   into a matrix
1721 
1722   Not Collective
1723 
1724   Input Parameters:
1725 + mat   - the matrix
1726 - array - the array in column major order
1727 
1728   Level: developer
1729 
1730   Note:
1731   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1732   freed when the matrix is destroyed.
1733 
1734 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1735           `MatDenseReplaceArray()`
1736 @*/
1737 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1738 {
1739   PetscFunctionBegin;
1740   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1741   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1742   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1743 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1744   mat->offloadmask = PETSC_OFFLOAD_CPU;
1745 #endif
1746   PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748 
1749 /*@
1750   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1751 
1752   Not Collective
1753 
1754   Input Parameter:
1755 . mat - the matrix
1756 
1757   Level: developer
1758 
1759   Note:
1760   You can only call this after a call to `MatDensePlaceArray()`
1761 
1762 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1763 @*/
1764 PetscErrorCode MatDenseResetArray(Mat mat)
1765 {
1766   PetscFunctionBegin;
1767   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1768   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1769   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 /*@
1774   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1775   array provided by the user. This is useful to avoid copying an array
1776   into a matrix
1777 
1778   Not Collective
1779 
1780   Input Parameters:
1781 + mat   - the matrix
1782 - array - the array in column major order
1783 
1784   Level: developer
1785 
1786   Note:
1787   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1788   freed by the user. It will be freed when the matrix is destroyed.
1789 
1790 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1791 @*/
1792 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1793 {
1794   PetscFunctionBegin;
1795   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1796   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1797   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1798 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1799   mat->offloadmask = PETSC_OFFLOAD_CPU;
1800 #endif
1801   PetscFunctionReturn(PETSC_SUCCESS);
1802 }
1803 
1804 /*@C
1805   MatCreateDense - Creates a matrix in `MATDENSE` format.
1806 
1807   Collective
1808 
1809   Input Parameters:
1810 + comm - MPI communicator
1811 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1812 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1813 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1814 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1815 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1816    to control all matrix memory allocation.
1817 
1818   Output Parameter:
1819 . A - the matrix
1820 
1821   Level: intermediate
1822 
1823   Notes:
1824   The dense format is fully compatible with standard Fortran
1825   storage by columns.
1826 
1827   Although local portions of the matrix are stored in column-major
1828   order, the matrix is partitioned across MPI ranks by row.
1829 
1830   The data input variable is intended primarily for Fortran programmers
1831   who wish to allocate their own matrix memory space.  Most users should
1832   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1833 
1834   The user MUST specify either the local or global matrix dimensions
1835   (possibly both).
1836 
1837 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1838 @*/
1839 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1840 {
1841   PetscFunctionBegin;
1842   PetscCall(MatCreate(comm, A));
1843   PetscCall(MatSetSizes(*A, m, n, M, N));
1844   PetscCall(MatSetType(*A, MATDENSE));
1845   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1846   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1847   PetscFunctionReturn(PETSC_SUCCESS);
1848 }
1849 
1850 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1851 {
1852   Mat           mat;
1853   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1854 
1855   PetscFunctionBegin;
1856   *newmat = NULL;
1857   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1858   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1859   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1860   a = (Mat_MPIDense *)mat->data;
1861 
1862   mat->factortype   = A->factortype;
1863   mat->assembled    = PETSC_TRUE;
1864   mat->preallocated = PETSC_TRUE;
1865 
1866   mat->insertmode = NOT_SET_VALUES;
1867   a->donotstash   = oldmat->donotstash;
1868 
1869   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1870   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1871 
1872   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1873 
1874   *newmat = mat;
1875   PetscFunctionReturn(PETSC_SUCCESS);
1876 }
1877 
1878 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1879 {
1880   PetscBool isbinary;
1881 #if defined(PETSC_HAVE_HDF5)
1882   PetscBool ishdf5;
1883 #endif
1884 
1885   PetscFunctionBegin;
1886   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1887   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1888   /* force binary viewer to load .info file if it has not yet done so */
1889   PetscCall(PetscViewerSetUp(viewer));
1890   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1891 #if defined(PETSC_HAVE_HDF5)
1892   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1893 #endif
1894   if (isbinary) {
1895     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1896 #if defined(PETSC_HAVE_HDF5)
1897   } else if (ishdf5) {
1898     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1899 #endif
1900   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1901   PetscFunctionReturn(PETSC_SUCCESS);
1902 }
1903 
1904 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1905 {
1906   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1907   Mat           a, b;
1908 
1909   PetscFunctionBegin;
1910   a = matA->A;
1911   b = matB->A;
1912   PetscCall(MatEqual(a, b, flag));
1913   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1918 {
1919   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1920 
1921   PetscFunctionBegin;
1922   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1923   PetscCall(MatDestroy(&atb->atb));
1924   PetscCall(PetscFree(atb));
1925   PetscFunctionReturn(PETSC_SUCCESS);
1926 }
1927 
1928 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1929 {
1930   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1934   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1935   PetscCall(PetscFree(abt));
1936   PetscFunctionReturn(PETSC_SUCCESS);
1937 }
1938 
1939 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1940 {
1941   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1942   Mat_TransMatMultDense *atb;
1943   MPI_Comm               comm;
1944   PetscMPIInt            size, *recvcounts;
1945   PetscScalar           *carray, *sendbuf;
1946   const PetscScalar     *atbarray;
1947   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1948   const PetscInt        *ranges;
1949 
1950   PetscFunctionBegin;
1951   MatCheckProduct(C, 3);
1952   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1953   atb        = (Mat_TransMatMultDense *)C->product->data;
1954   recvcounts = atb->recvcounts;
1955   sendbuf    = atb->sendbuf;
1956 
1957   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1958   PetscCallMPI(MPI_Comm_size(comm, &size));
1959 
1960   /* compute atbarray = aseq^T * bseq */
1961   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1962 
1963   PetscCall(MatGetOwnershipRanges(C, &ranges));
1964 
1965   /* arrange atbarray into sendbuf */
1966   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1967   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1968   for (proc = 0, k = 0; proc < size; proc++) {
1969     for (j = 0; j < cN; j++) {
1970       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1971     }
1972   }
1973   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1974 
1975   /* sum all atbarray to local values of C */
1976   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1977   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1978   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1979   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1980   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1985 {
1986   MPI_Comm               comm;
1987   PetscMPIInt            size;
1988   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1989   Mat_TransMatMultDense *atb;
1990   PetscBool              cisdense = PETSC_FALSE;
1991   PetscInt               i;
1992   const PetscInt        *ranges;
1993 
1994   PetscFunctionBegin;
1995   MatCheckProduct(C, 4);
1996   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
1997   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1998   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1999     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2000   }
2001 
2002   /* create matrix product C */
2003   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2004 #if defined(PETSC_HAVE_CUDA)
2005   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2006 #endif
2007 #if defined(PETSC_HAVE_HIP)
2008   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2009 #endif
2010   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2011   PetscCall(MatSetUp(C));
2012 
2013   /* create data structure for reuse C */
2014   PetscCallMPI(MPI_Comm_size(comm, &size));
2015   PetscCall(PetscNew(&atb));
2016   cM = C->rmap->N;
2017   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2018   PetscCall(MatGetOwnershipRanges(C, &ranges));
2019   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2020 
2021   C->product->data    = atb;
2022   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2023   PetscFunctionReturn(PETSC_SUCCESS);
2024 }
2025 
2026 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2027 {
2028   MPI_Comm               comm;
2029   PetscMPIInt            i, size;
2030   PetscInt               maxRows, bufsiz;
2031   PetscMPIInt            tag;
2032   PetscInt               alg;
2033   Mat_MatTransMultDense *abt;
2034   Mat_Product           *product = C->product;
2035   PetscBool              flg;
2036 
2037   PetscFunctionBegin;
2038   MatCheckProduct(C, 4);
2039   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2040   /* check local size of A and B */
2041   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2042 
2043   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2044   alg = flg ? 0 : 1;
2045 
2046   /* setup matrix product C */
2047   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2048   PetscCall(MatSetType(C, MATMPIDENSE));
2049   PetscCall(MatSetUp(C));
2050   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2051 
2052   /* create data structure for reuse C */
2053   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2054   PetscCallMPI(MPI_Comm_size(comm, &size));
2055   PetscCall(PetscNew(&abt));
2056   abt->tag = tag;
2057   abt->alg = alg;
2058   switch (alg) {
2059   case 1: /* alg: "cyclic" */
2060     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2061     bufsiz = A->cmap->N * maxRows;
2062     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2063     break;
2064   default: /* alg: "allgatherv" */
2065     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2066     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2067     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2068     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2069     break;
2070   }
2071 
2072   C->product->data                = abt;
2073   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2074   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2075   PetscFunctionReturn(PETSC_SUCCESS);
2076 }
2077 
2078 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2079 {
2080   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2081   Mat_MatTransMultDense *abt;
2082   MPI_Comm               comm;
2083   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2084   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2085   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2086   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2087   const PetscScalar     *av, *bv;
2088   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2089   MPI_Request            reqs[2];
2090   const PetscInt        *ranges;
2091 
2092   PetscFunctionBegin;
2093   MatCheckProduct(C, 3);
2094   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2095   abt = (Mat_MatTransMultDense *)C->product->data;
2096   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2097   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2098   PetscCallMPI(MPI_Comm_size(comm, &size));
2099   PetscCall(MatDenseGetArrayRead(a->A, &av));
2100   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2101   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2102   PetscCall(MatDenseGetLDA(a->A, &i));
2103   PetscCall(PetscBLASIntCast(i, &alda));
2104   PetscCall(MatDenseGetLDA(b->A, &i));
2105   PetscCall(PetscBLASIntCast(i, &blda));
2106   PetscCall(MatDenseGetLDA(c->A, &i));
2107   PetscCall(PetscBLASIntCast(i, &clda));
2108   PetscCall(MatGetOwnershipRanges(B, &ranges));
2109   bn = B->rmap->n;
2110   if (blda == bn) {
2111     sendbuf = (PetscScalar *)bv;
2112   } else {
2113     sendbuf = abt->buf[0];
2114     for (k = 0, i = 0; i < cK; i++) {
2115       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2116     }
2117   }
2118   if (size > 1) {
2119     sendto   = (rank + size - 1) % size;
2120     recvfrom = (rank + size + 1) % size;
2121   } else {
2122     sendto = recvfrom = 0;
2123   }
2124   PetscCall(PetscBLASIntCast(cK, &ck));
2125   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2126   recvisfrom = rank;
2127   for (i = 0; i < size; i++) {
2128     /* we have finished receiving in sending, bufs can be read/modified */
2129     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2130     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2131 
2132     if (nextrecvisfrom != rank) {
2133       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2134       sendsiz = cK * bn;
2135       recvsiz = cK * nextbn;
2136       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2137       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2138       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2139     }
2140 
2141     /* local aseq * sendbuf^T */
2142     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2143     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2144 
2145     if (nextrecvisfrom != rank) {
2146       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2147       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2148     }
2149     bn         = nextbn;
2150     recvisfrom = nextrecvisfrom;
2151     sendbuf    = recvbuf;
2152   }
2153   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2154   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2155   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2156   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2157   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2158   PetscFunctionReturn(PETSC_SUCCESS);
2159 }
2160 
2161 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2162 {
2163   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2164   Mat_MatTransMultDense *abt;
2165   MPI_Comm               comm;
2166   PetscMPIInt            size;
2167   PetscScalar           *cv, *sendbuf, *recvbuf;
2168   const PetscScalar     *av, *bv;
2169   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2170   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2171   PetscBLASInt           cm, cn, ck, alda, clda;
2172 
2173   PetscFunctionBegin;
2174   MatCheckProduct(C, 3);
2175   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2176   abt = (Mat_MatTransMultDense *)C->product->data;
2177   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2178   PetscCallMPI(MPI_Comm_size(comm, &size));
2179   PetscCall(MatDenseGetArrayRead(a->A, &av));
2180   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2181   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2182   PetscCall(MatDenseGetLDA(a->A, &i));
2183   PetscCall(PetscBLASIntCast(i, &alda));
2184   PetscCall(MatDenseGetLDA(b->A, &blda));
2185   PetscCall(MatDenseGetLDA(c->A, &i));
2186   PetscCall(PetscBLASIntCast(i, &clda));
2187   /* copy transpose of B into buf[0] */
2188   bn      = B->rmap->n;
2189   sendbuf = abt->buf[0];
2190   recvbuf = abt->buf[1];
2191   for (k = 0, j = 0; j < bn; j++) {
2192     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2193   }
2194   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2195   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2196   PetscCall(PetscBLASIntCast(cK, &ck));
2197   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2198   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2199   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2200   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2201   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2202   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2203   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2204   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2205   PetscFunctionReturn(PETSC_SUCCESS);
2206 }
2207 
2208 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2209 {
2210   Mat_MatTransMultDense *abt;
2211 
2212   PetscFunctionBegin;
2213   MatCheckProduct(C, 3);
2214   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2215   abt = (Mat_MatTransMultDense *)C->product->data;
2216   switch (abt->alg) {
2217   case 1:
2218     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2219     break;
2220   default:
2221     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2222     break;
2223   }
2224   PetscFunctionReturn(PETSC_SUCCESS);
2225 }
2226 
2227 #if defined(PETSC_HAVE_ELEMENTAL)
2228 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2229 {
2230   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2231 
2232   PetscFunctionBegin;
2233   PetscCall(MatDestroy(&ab->Ce));
2234   PetscCall(MatDestroy(&ab->Ae));
2235   PetscCall(MatDestroy(&ab->Be));
2236   PetscCall(PetscFree(ab));
2237   PetscFunctionReturn(PETSC_SUCCESS);
2238 }
2239 
2240 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2241 {
2242   Mat_MatMultDense *ab;
2243 
2244   PetscFunctionBegin;
2245   MatCheckProduct(C, 3);
2246   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2247   ab = (Mat_MatMultDense *)C->product->data;
2248   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2249   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2250   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2251   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2252   PetscFunctionReturn(PETSC_SUCCESS);
2253 }
2254 
2255 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2256 {
2257   Mat               Ae, Be, Ce;
2258   Mat_MatMultDense *ab;
2259 
2260   PetscFunctionBegin;
2261   MatCheckProduct(C, 4);
2262   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2263   /* check local size of A and B */
2264   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2265     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2266   }
2267 
2268   /* create elemental matrices Ae and Be */
2269   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2270   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2271   PetscCall(MatSetType(Ae, MATELEMENTAL));
2272   PetscCall(MatSetUp(Ae));
2273   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2274 
2275   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2276   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2277   PetscCall(MatSetType(Be, MATELEMENTAL));
2278   PetscCall(MatSetUp(Be));
2279   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2280 
2281   /* compute symbolic Ce = Ae*Be */
2282   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2283   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2284 
2285   /* setup C */
2286   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2287   PetscCall(MatSetType(C, MATDENSE));
2288   PetscCall(MatSetUp(C));
2289 
2290   /* create data structure for reuse Cdense */
2291   PetscCall(PetscNew(&ab));
2292   ab->Ae = Ae;
2293   ab->Be = Be;
2294   ab->Ce = Ce;
2295 
2296   C->product->data       = ab;
2297   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2298   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2299   PetscFunctionReturn(PETSC_SUCCESS);
2300 }
2301 #endif
2302 
2303 #if defined(PETSC_HAVE_ELEMENTAL)
2304 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2305 {
2306   PetscFunctionBegin;
2307   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2308   C->ops->productsymbolic = MatProductSymbolic_AB;
2309   PetscFunctionReturn(PETSC_SUCCESS);
2310 }
2311 #endif
2312 
2313 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2314 {
2315   Mat_Product *product = C->product;
2316   Mat          A = product->A, B = product->B;
2317 
2318   PetscFunctionBegin;
2319   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2320     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2321   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2322   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2323   PetscFunctionReturn(PETSC_SUCCESS);
2324 }
2325 
2326 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2327 {
2328   Mat_Product *product     = C->product;
2329   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2330   PetscInt     alg, nalg = 2;
2331   PetscBool    flg = PETSC_FALSE;
2332 
2333   PetscFunctionBegin;
2334   /* Set default algorithm */
2335   alg = 0; /* default is allgatherv */
2336   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2337   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2338 
2339   /* Get runtime option */
2340   if (product->api_user) {
2341     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2342     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2343     PetscOptionsEnd();
2344   } else {
2345     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2346     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2347     PetscOptionsEnd();
2348   }
2349   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2350 
2351   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2352   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2353   PetscFunctionReturn(PETSC_SUCCESS);
2354 }
2355 
2356 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2357 {
2358   Mat_Product *product = C->product;
2359 
2360   PetscFunctionBegin;
2361   switch (product->type) {
2362 #if defined(PETSC_HAVE_ELEMENTAL)
2363   case MATPRODUCT_AB:
2364     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2365     break;
2366 #endif
2367   case MATPRODUCT_AtB:
2368     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2369     break;
2370   case MATPRODUCT_ABt:
2371     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2372     break;
2373   default:
2374     break;
2375   }
2376   PetscFunctionReturn(PETSC_SUCCESS);
2377 }
2378