xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5a67bb2bab7ce296578be4e8d1213f21203fd3df)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
470 
471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472 {
473   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474   const PetscScalar *ax;
475   PetscScalar       *ay;
476   PetscMemType       axmtype, aymtype;
477 
478   PetscFunctionBegin;
479   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
480   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
481   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
482   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
483   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
484   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
485   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
486   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
491 {
492   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
493   const PetscScalar *ax;
494   PetscScalar       *ay;
495   PetscMemType       axmtype, aymtype;
496 
497   PetscFunctionBegin;
498   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
510 {
511   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
512   const PetscScalar *ax;
513   PetscScalar       *ay;
514   PetscMemType       axmtype, aymtype;
515 
516   PetscFunctionBegin;
517   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
518   PetscCall(VecSet(yy, 0.0));
519   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
520   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
521   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
522   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
523   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
524   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
525   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecCopy(yy, zz));
539   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
553   PetscScalar       *x;
554   const PetscScalar *av;
555 
556   PetscFunctionBegin;
557   PetscCall(VecGetArray(v, &x));
558   PetscCall(VecGetSize(v, &ng));
559   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
560   PetscCall(VecGetLocalSize(v, &nl));
561   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
562   radd = A->rmap->rstart * m;
563   PetscCall(MatDenseGetArrayRead(a->A, &av));
564   PetscCall(MatDenseGetLDA(a->A, &lda));
565   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
566   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
567   PetscCall(PetscArrayzero(x + i, nl - i));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
578   PetscCall(MatStashDestroy_Private(&mat->stash));
579   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
580   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
581   PetscCall(MatDestroy(&mdn->A));
582   PetscCall(VecDestroy(&mdn->lvec));
583   PetscCall(PetscSFDestroy(&mdn->Mvctx));
584   PetscCall(VecDestroy(&mdn->cvec));
585   PetscCall(MatDestroy(&mdn->cmat));
586 
587   PetscCall(PetscFree(mat->data));
588   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
589 
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
603 #if defined(PETSC_HAVE_ELEMENTAL)
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
605 #endif
606 #if defined(PETSC_HAVE_SCALAPACK)
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
608 #endif
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
612 #if defined(PETSC_HAVE_CUDA)
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
631 #endif
632 #if defined(PETSC_HAVE_HIP)
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(PETSC_SUCCESS);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(PETSC_SUCCESS);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(MatDestroy(&A));
747   }
748   PetscFunctionReturn(PETSC_SUCCESS);
749 }
750 
751 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
752 {
753   PetscBool iascii, isbinary, isdraw, issocket;
754 
755   PetscFunctionBegin;
756   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
760 
761   if (iascii || issocket || isdraw) {
762     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
763   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
768 {
769   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
770   Mat            mdn = mat->A;
771   PetscLogDouble isend[5], irecv[5];
772 
773   PetscFunctionBegin;
774   info->block_size = 1.0;
775 
776   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
777 
778   isend[0] = info->nz_used;
779   isend[1] = info->nz_allocated;
780   isend[2] = info->nz_unneeded;
781   isend[3] = info->memory;
782   isend[4] = info->mallocs;
783   if (flag == MAT_LOCAL) {
784     info->nz_used      = isend[0];
785     info->nz_allocated = isend[1];
786     info->nz_unneeded  = isend[2];
787     info->memory       = isend[3];
788     info->mallocs      = isend[4];
789   } else if (flag == MAT_GLOBAL_MAX) {
790     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
791 
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   } else if (flag == MAT_GLOBAL_SUM) {
798     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
799 
800     info->nz_used      = irecv[0];
801     info->nz_allocated = irecv[1];
802     info->nz_unneeded  = irecv[2];
803     info->memory       = irecv[3];
804     info->mallocs      = irecv[4];
805   }
806   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
807   info->fill_ratio_needed = 0;
808   info->factor_mallocs    = 0;
809   PetscFunctionReturn(PETSC_SUCCESS);
810 }
811 
812 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
813 {
814   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
815 
816   PetscFunctionBegin;
817   switch (op) {
818   case MAT_NEW_NONZERO_LOCATIONS:
819   case MAT_NEW_NONZERO_LOCATION_ERR:
820   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821     MatCheckPreallocated(A, 1);
822     PetscCall(MatSetOption(a->A, op, flg));
823     break;
824   case MAT_ROW_ORIENTED:
825     MatCheckPreallocated(A, 1);
826     a->roworiented = flg;
827     PetscCall(MatSetOption(a->A, op, flg));
828     break;
829   case MAT_FORCE_DIAGONAL_ENTRIES:
830   case MAT_KEEP_NONZERO_PATTERN:
831   case MAT_USE_HASH_TABLE:
832   case MAT_SORTED_FULL:
833     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
834     break;
835   case MAT_IGNORE_OFF_PROC_ENTRIES:
836     a->donotstash = flg;
837     break;
838   case MAT_SYMMETRIC:
839   case MAT_STRUCTURALLY_SYMMETRIC:
840   case MAT_HERMITIAN:
841   case MAT_SYMMETRY_ETERNAL:
842   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
843   case MAT_SPD:
844   case MAT_IGNORE_LOWER_TRIANGULAR:
845   case MAT_IGNORE_ZERO_ENTRIES:
846   case MAT_SPD_ETERNAL:
847     /* if the diagonal matrix is square it inherits some of the properties above */
848     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
849     break;
850   default:
851     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
852   }
853   PetscFunctionReturn(PETSC_SUCCESS);
854 }
855 
856 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
857 {
858   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
859   const PetscScalar *l;
860   PetscScalar        x, *v, *vv, *r;
861   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
862 
863   PetscFunctionBegin;
864   PetscCall(MatDenseGetArray(mdn->A, &vv));
865   PetscCall(MatDenseGetLDA(mdn->A, &lda));
866   PetscCall(MatGetLocalSize(A, &s2, &s3));
867   if (ll) {
868     PetscCall(VecGetLocalSize(ll, &s2a));
869     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
870     PetscCall(VecGetArrayRead(ll, &l));
871     for (i = 0; i < m; i++) {
872       x = l[i];
873       v = vv + i;
874       for (j = 0; j < n; j++) {
875         (*v) *= x;
876         v += lda;
877       }
878     }
879     PetscCall(VecRestoreArrayRead(ll, &l));
880     PetscCall(PetscLogFlops(1.0 * n * m));
881   }
882   if (rr) {
883     const PetscScalar *ar;
884 
885     PetscCall(VecGetLocalSize(rr, &s3a));
886     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
887     PetscCall(VecGetArrayRead(rr, &ar));
888     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
889     PetscCall(VecGetArray(mdn->lvec, &r));
890     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
891     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(VecRestoreArrayRead(rr, &ar));
893     for (i = 0; i < n; i++) {
894       x = r[i];
895       v = vv + i * lda;
896       for (j = 0; j < m; j++) (*v++) *= x;
897     }
898     PetscCall(VecRestoreArray(mdn->lvec, &r));
899     PetscCall(PetscLogFlops(1.0 * n * m));
900   }
901   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
906 {
907   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
908   PetscInt           i, j;
909   PetscMPIInt        size;
910   PetscReal          sum = 0.0;
911   const PetscScalar *av, *v;
912 
913   PetscFunctionBegin;
914   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
915   v = av;
916   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
917   if (size == 1) {
918     PetscCall(MatNorm(mdn->A, type, nrm));
919   } else {
920     if (type == NORM_FROBENIUS) {
921       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
922         sum += PetscRealPart(PetscConj(*v) * (*v));
923         v++;
924       }
925       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
926       *nrm = PetscSqrtReal(*nrm);
927       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
928     } else if (type == NORM_1) {
929       PetscReal *tmp, *tmp2;
930       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
931       *nrm = 0.0;
932       v    = av;
933       for (j = 0; j < mdn->A->cmap->n; j++) {
934         for (i = 0; i < mdn->A->rmap->n; i++) {
935           tmp[j] += PetscAbsScalar(*v);
936           v++;
937         }
938       }
939       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
940       for (j = 0; j < A->cmap->N; j++) {
941         if (tmp2[j] > *nrm) *nrm = tmp2[j];
942       }
943       PetscCall(PetscFree2(tmp, tmp2));
944       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
945     } else if (type == NORM_INFINITY) { /* max row norm */
946       PetscReal ntemp;
947       PetscCall(MatNorm(mdn->A, type, &ntemp));
948       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
949     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
950   }
951   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
952   PetscFunctionReturn(PETSC_SUCCESS);
953 }
954 
955 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
956 {
957   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
958   Mat           B;
959   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
960   PetscInt      j, i, lda;
961   PetscScalar  *v;
962 
963   PetscFunctionBegin;
964   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
965   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
966     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
967     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
968     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
969     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
970   } else B = *matout;
971 
972   m = a->A->rmap->n;
973   n = a->A->cmap->n;
974   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
975   PetscCall(MatDenseGetLDA(a->A, &lda));
976   PetscCall(PetscMalloc1(m, &rwork));
977   for (i = 0; i < m; i++) rwork[i] = rstart + i;
978   for (j = 0; j < n; j++) {
979     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
980     v += lda;
981   }
982   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
983   PetscCall(PetscFree(rwork));
984   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
985   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
986   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
987     *matout = B;
988   } else {
989     PetscCall(MatHeaderMerge(A, &B));
990   }
991   PetscFunctionReturn(PETSC_SUCCESS);
992 }
993 
994 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
995 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
996 
997 static PetscErrorCode MatSetUp_MPIDense(Mat A)
998 {
999   PetscFunctionBegin;
1000   PetscCall(PetscLayoutSetUp(A->rmap));
1001   PetscCall(PetscLayoutSetUp(A->cmap));
1002   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1003   PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005 
1006 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1007 {
1008   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1009 
1010   PetscFunctionBegin;
1011   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1012   PetscFunctionReturn(PETSC_SUCCESS);
1013 }
1014 
1015 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1016 {
1017   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1018 
1019   PetscFunctionBegin;
1020   PetscCall(MatConjugate(a->A));
1021   PetscFunctionReturn(PETSC_SUCCESS);
1022 }
1023 
1024 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1025 {
1026   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1027 
1028   PetscFunctionBegin;
1029   PetscCall(MatRealPart(a->A));
1030   PetscFunctionReturn(PETSC_SUCCESS);
1031 }
1032 
1033 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1034 {
1035   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1036 
1037   PetscFunctionBegin;
1038   PetscCall(MatImaginaryPart(a->A));
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1043 {
1044   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1045 
1046   PetscFunctionBegin;
1047   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1048   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1049   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1050   PetscFunctionReturn(PETSC_SUCCESS);
1051 }
1052 
1053 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1054 
1055 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1056 {
1057   PetscInt      i, m, n;
1058   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1059   PetscReal    *work;
1060 
1061   PetscFunctionBegin;
1062   PetscCall(MatGetSize(A, &m, &n));
1063   PetscCall(PetscMalloc1(n, &work));
1064   if (type == REDUCTION_MEAN_REALPART) {
1065     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1066   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1067     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1068   } else {
1069     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1070   }
1071   if (type == NORM_2) {
1072     for (i = 0; i < n; i++) work[i] *= work[i];
1073   }
1074   if (type == NORM_INFINITY) {
1075     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1076   } else {
1077     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1078   }
1079   PetscCall(PetscFree(work));
1080   if (type == NORM_2) {
1081     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1082   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1083     for (i = 0; i < n; i++) reductions[i] /= m;
1084   }
1085   PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087 
1088 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1089 {
1090   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1091 
1092   PetscFunctionBegin;
1093   PetscCall(MatSetRandom(d->A, rctx));
1094 #if defined(PETSC_HAVE_DEVICE)
1095   x->offloadmask = d->A->offloadmask;
1096 #endif
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1101 {
1102   PetscFunctionBegin;
1103   *missing = PETSC_FALSE;
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1108 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1109 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1110 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1111 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1112 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1113 
1114 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1115                                        MatGetRow_MPIDense,
1116                                        MatRestoreRow_MPIDense,
1117                                        MatMult_MPIDense,
1118                                        /*  4*/ MatMultAdd_MPIDense,
1119                                        MatMultTranspose_MPIDense,
1120                                        MatMultTransposeAdd_MPIDense,
1121                                        NULL,
1122                                        NULL,
1123                                        NULL,
1124                                        /* 10*/ NULL,
1125                                        NULL,
1126                                        NULL,
1127                                        NULL,
1128                                        MatTranspose_MPIDense,
1129                                        /* 15*/ MatGetInfo_MPIDense,
1130                                        MatEqual_MPIDense,
1131                                        MatGetDiagonal_MPIDense,
1132                                        MatDiagonalScale_MPIDense,
1133                                        MatNorm_MPIDense,
1134                                        /* 20*/ MatAssemblyBegin_MPIDense,
1135                                        MatAssemblyEnd_MPIDense,
1136                                        MatSetOption_MPIDense,
1137                                        MatZeroEntries_MPIDense,
1138                                        /* 24*/ MatZeroRows_MPIDense,
1139                                        NULL,
1140                                        NULL,
1141                                        NULL,
1142                                        NULL,
1143                                        /* 29*/ MatSetUp_MPIDense,
1144                                        NULL,
1145                                        NULL,
1146                                        MatGetDiagonalBlock_MPIDense,
1147                                        NULL,
1148                                        /* 34*/ MatDuplicate_MPIDense,
1149                                        NULL,
1150                                        NULL,
1151                                        NULL,
1152                                        NULL,
1153                                        /* 39*/ MatAXPY_MPIDense,
1154                                        MatCreateSubMatrices_MPIDense,
1155                                        NULL,
1156                                        MatGetValues_MPIDense,
1157                                        MatCopy_MPIDense,
1158                                        /* 44*/ NULL,
1159                                        MatScale_MPIDense,
1160                                        MatShift_MPIDense,
1161                                        NULL,
1162                                        NULL,
1163                                        /* 49*/ MatSetRandom_MPIDense,
1164                                        NULL,
1165                                        NULL,
1166                                        NULL,
1167                                        NULL,
1168                                        /* 54*/ NULL,
1169                                        NULL,
1170                                        NULL,
1171                                        NULL,
1172                                        NULL,
1173                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1174                                        MatDestroy_MPIDense,
1175                                        MatView_MPIDense,
1176                                        NULL,
1177                                        NULL,
1178                                        /* 64*/ NULL,
1179                                        NULL,
1180                                        NULL,
1181                                        NULL,
1182                                        NULL,
1183                                        /* 69*/ NULL,
1184                                        NULL,
1185                                        NULL,
1186                                        NULL,
1187                                        NULL,
1188                                        /* 74*/ NULL,
1189                                        NULL,
1190                                        NULL,
1191                                        NULL,
1192                                        NULL,
1193                                        /* 79*/ NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        NULL,
1197                                        /* 83*/ MatLoad_MPIDense,
1198                                        NULL,
1199                                        NULL,
1200                                        NULL,
1201                                        NULL,
1202                                        NULL,
1203                                        /* 89*/ NULL,
1204                                        NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        NULL,
1208                                        /* 94*/ NULL,
1209                                        NULL,
1210                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1211                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1212                                        NULL,
1213                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1214                                        NULL,
1215                                        NULL,
1216                                        MatConjugate_MPIDense,
1217                                        NULL,
1218                                        /*104*/ NULL,
1219                                        MatRealPart_MPIDense,
1220                                        MatImaginaryPart_MPIDense,
1221                                        NULL,
1222                                        NULL,
1223                                        /*109*/ NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        MatGetColumnVector_MPIDense,
1227                                        MatMissingDiagonal_MPIDense,
1228                                        /*114*/ NULL,
1229                                        NULL,
1230                                        NULL,
1231                                        NULL,
1232                                        NULL,
1233                                        /*119*/ NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        NULL,
1237                                        NULL,
1238                                        /*124*/ NULL,
1239                                        MatGetColumnReductions_MPIDense,
1240                                        NULL,
1241                                        NULL,
1242                                        NULL,
1243                                        /*129*/ NULL,
1244                                        NULL,
1245                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1246                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1247                                        NULL,
1248                                        /*134*/ NULL,
1249                                        NULL,
1250                                        NULL,
1251                                        NULL,
1252                                        NULL,
1253                                        /*139*/ NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1259                                        /*145*/ NULL,
1260                                        NULL,
1261                                        NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        /*150*/ NULL,
1265                                        NULL};
1266 
1267 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1268 {
1269   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1270   MatType       mtype = MATSEQDENSE;
1271 
1272   PetscFunctionBegin;
1273   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1274   PetscCall(PetscLayoutSetUp(mat->rmap));
1275   PetscCall(PetscLayoutSetUp(mat->cmap));
1276   if (!a->A) {
1277     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1278     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1279   }
1280 #if defined(PETSC_HAVE_CUDA)
1281   PetscBool iscuda;
1282   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1283   if (iscuda) mtype = MATSEQDENSECUDA;
1284 #endif
1285 #if defined(PETSC_HAVE_HIP)
1286   PetscBool iship;
1287   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1288   if (iship) mtype = MATSEQDENSEHIP;
1289 #endif
1290   PetscCall(MatSetType(a->A, mtype));
1291   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1292 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1293   mat->offloadmask = a->A->offloadmask;
1294 #endif
1295   mat->preallocated = PETSC_TRUE;
1296   mat->assembled    = PETSC_TRUE;
1297   PetscFunctionReturn(PETSC_SUCCESS);
1298 }
1299 
1300 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1301 {
1302   Mat B, C;
1303 
1304   PetscFunctionBegin;
1305   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1306   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1307   PetscCall(MatDestroy(&C));
1308   if (reuse == MAT_REUSE_MATRIX) {
1309     C = *newmat;
1310   } else C = NULL;
1311   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1312   PetscCall(MatDestroy(&B));
1313   if (reuse == MAT_INPLACE_MATRIX) {
1314     PetscCall(MatHeaderReplace(A, &C));
1315   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1316   PetscFunctionReturn(PETSC_SUCCESS);
1317 }
1318 
1319 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1320 {
1321   Mat B, C;
1322 
1323   PetscFunctionBegin;
1324   PetscCall(MatDenseGetLocalMatrix(A, &C));
1325   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1326   if (reuse == MAT_REUSE_MATRIX) {
1327     C = *newmat;
1328   } else C = NULL;
1329   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1330   PetscCall(MatDestroy(&B));
1331   if (reuse == MAT_INPLACE_MATRIX) {
1332     PetscCall(MatHeaderReplace(A, &C));
1333   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1334   PetscFunctionReturn(PETSC_SUCCESS);
1335 }
1336 
1337 #if defined(PETSC_HAVE_ELEMENTAL)
1338 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1339 {
1340   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1341   Mat           mat_elemental;
1342   PetscScalar  *v;
1343   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1344 
1345   PetscFunctionBegin;
1346   if (reuse == MAT_REUSE_MATRIX) {
1347     mat_elemental = *newmat;
1348     PetscCall(MatZeroEntries(*newmat));
1349   } else {
1350     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1351     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1352     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1353     PetscCall(MatSetUp(mat_elemental));
1354     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1355   }
1356 
1357   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1358   for (i = 0; i < N; i++) cols[i] = i;
1359   for (i = 0; i < m; i++) rows[i] = rstart + i;
1360 
1361   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1362   PetscCall(MatDenseGetArray(A, &v));
1363   PetscCall(MatDenseGetLDA(a->A, &lda));
1364   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1365   else {
1366     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1367   }
1368   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1369   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1370   PetscCall(MatDenseRestoreArray(A, &v));
1371   PetscCall(PetscFree2(rows, cols));
1372 
1373   if (reuse == MAT_INPLACE_MATRIX) {
1374     PetscCall(MatHeaderReplace(A, &mat_elemental));
1375   } else {
1376     *newmat = mat_elemental;
1377   }
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 #endif
1381 
1382 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1383 {
1384   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1385 
1386   PetscFunctionBegin;
1387   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1392 {
1393   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1394 
1395   PetscFunctionBegin;
1396   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1401 {
1402   Mat_MPIDense *mat;
1403   PetscInt      m, nloc, N;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(MatGetSize(inmat, &m, &N));
1407   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1408   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1409     PetscInt sum;
1410 
1411     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1412     /* Check sum(n) = N */
1413     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1414     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1415 
1416     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1417     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1418   }
1419 
1420   /* numeric phase */
1421   mat = (Mat_MPIDense *)(*outmat)->data;
1422   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1423   PetscFunctionReturn(PETSC_SUCCESS);
1424 }
1425 
1426 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1427 {
1428   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1429   PetscInt      lda;
1430 
1431   PetscFunctionBegin;
1432   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1433   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1434   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1435   a->vecinuse = col + 1;
1436   PetscCall(MatDenseGetLDA(a->A, &lda));
1437   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1438   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1439   *v = a->cvec;
1440   PetscFunctionReturn(PETSC_SUCCESS);
1441 }
1442 
1443 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1444 {
1445   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1446 
1447   PetscFunctionBegin;
1448   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1449   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1450   a->vecinuse = 0;
1451   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1452   PetscCall(VecResetArray(a->cvec));
1453   if (v) *v = NULL;
1454   PetscFunctionReturn(PETSC_SUCCESS);
1455 }
1456 
1457 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1458 {
1459   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1460   PetscInt      lda;
1461 
1462   PetscFunctionBegin;
1463   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1464   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1465   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1466   a->vecinuse = col + 1;
1467   PetscCall(MatDenseGetLDA(a->A, &lda));
1468   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1469   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1470   PetscCall(VecLockReadPush(a->cvec));
1471   *v = a->cvec;
1472   PetscFunctionReturn(PETSC_SUCCESS);
1473 }
1474 
1475 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1476 {
1477   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1478 
1479   PetscFunctionBegin;
1480   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1481   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1482   a->vecinuse = 0;
1483   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1484   PetscCall(VecLockReadPop(a->cvec));
1485   PetscCall(VecResetArray(a->cvec));
1486   if (v) *v = NULL;
1487   PetscFunctionReturn(PETSC_SUCCESS);
1488 }
1489 
1490 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1491 {
1492   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1493   PetscInt      lda;
1494 
1495   PetscFunctionBegin;
1496   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1497   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1498   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1499   a->vecinuse = col + 1;
1500   PetscCall(MatDenseGetLDA(a->A, &lda));
1501   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1502   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1503   *v = a->cvec;
1504   PetscFunctionReturn(PETSC_SUCCESS);
1505 }
1506 
1507 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1508 {
1509   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1510 
1511   PetscFunctionBegin;
1512   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1513   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1514   a->vecinuse = 0;
1515   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1516   PetscCall(VecResetArray(a->cvec));
1517   if (v) *v = NULL;
1518   PetscFunctionReturn(PETSC_SUCCESS);
1519 }
1520 
1521 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1522 {
1523   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1524   Mat_MPIDense *c;
1525   MPI_Comm      comm;
1526   PetscInt      pbegin, pend;
1527 
1528   PetscFunctionBegin;
1529   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1530   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1531   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1532   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1533   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1534   if (!a->cmat) {
1535     PetscCall(MatCreate(comm, &a->cmat));
1536     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1537     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1538     else {
1539       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1540       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1541       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1542     }
1543     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1544     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1545   } else {
1546     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1547     if (same && a->cmat->rmap->N != A->rmap->N) {
1548       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1549       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1550     }
1551     if (!same) {
1552       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1553       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1554       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1555       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1556       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1557     }
1558     if (cend - cbegin != a->cmat->cmap->N) {
1559       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1560       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1561       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1562       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1563     }
1564   }
1565   c = (Mat_MPIDense *)a->cmat->data;
1566   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1567   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1568 
1569   a->cmat->preallocated = PETSC_TRUE;
1570   a->cmat->assembled    = PETSC_TRUE;
1571 #if defined(PETSC_HAVE_DEVICE)
1572   a->cmat->offloadmask = c->A->offloadmask;
1573 #endif
1574   a->matinuse = cbegin + 1;
1575   *v          = a->cmat;
1576   PetscFunctionReturn(PETSC_SUCCESS);
1577 }
1578 
1579 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1580 {
1581   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1582   Mat_MPIDense *c;
1583 
1584   PetscFunctionBegin;
1585   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1586   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1587   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1588   a->matinuse = 0;
1589   c           = (Mat_MPIDense *)a->cmat->data;
1590   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1591   if (v) *v = NULL;
1592 #if defined(PETSC_HAVE_DEVICE)
1593   A->offloadmask = a->A->offloadmask;
1594 #endif
1595   PetscFunctionReturn(PETSC_SUCCESS);
1596 }
1597 
1598 /*MC
1599    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1600 
1601    Options Database Key:
1602 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1603 
1604   Level: beginner
1605 
1606 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1607 M*/
1608 PetscErrorCode MatCreate_MPIDense(Mat mat)
1609 {
1610   Mat_MPIDense *a;
1611 
1612   PetscFunctionBegin;
1613   PetscCall(PetscNew(&a));
1614   mat->data   = (void *)a;
1615   mat->ops[0] = MatOps_Values;
1616 
1617   mat->insertmode = NOT_SET_VALUES;
1618 
1619   /* build cache for off array entries formed */
1620   a->donotstash = PETSC_FALSE;
1621 
1622   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1623 
1624   /* stuff used for matrix vector multiply */
1625   a->lvec        = NULL;
1626   a->Mvctx       = NULL;
1627   a->roworiented = PETSC_TRUE;
1628 
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1650 #if defined(PETSC_HAVE_ELEMENTAL)
1651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1652 #endif
1653 #if defined(PETSC_HAVE_SCALAPACK)
1654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1655 #endif
1656 #if defined(PETSC_HAVE_CUDA)
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1658 #endif
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1662 #if defined(PETSC_HAVE_CUDA)
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1665 #endif
1666 #if defined(PETSC_HAVE_HIP)
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1670 #endif
1671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1673   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1674   PetscFunctionReturn(PETSC_SUCCESS);
1675 }
1676 
1677 /*MC
1678    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1679 
1680    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1681    and `MATMPIDENSE` otherwise.
1682 
1683    Options Database Key:
1684 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1685 
1686   Level: beginner
1687 
1688 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1689 M*/
1690 
1691 /*@C
1692   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1693 
1694   Collective
1695 
1696   Input Parameters:
1697 + B    - the matrix
1698 - data - optional location of matrix data.  Set to `NULL` for PETSc
1699    to control all matrix memory allocation.
1700 
1701   Level: intermediate
1702 
1703   Notes:
1704   The dense format is fully compatible with standard Fortran
1705   storage by columns.
1706 
1707   The data input variable is intended primarily for Fortran programmers
1708   who wish to allocate their own matrix memory space.  Most users should
1709   set `data` to `NULL`.
1710 
1711 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1712 @*/
1713 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1714 {
1715   PetscFunctionBegin;
1716   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1717   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 /*@
1722   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1723   array provided by the user. This is useful to avoid copying an array
1724   into a matrix
1725 
1726   Not Collective
1727 
1728   Input Parameters:
1729 + mat   - the matrix
1730 - array - the array in column major order
1731 
1732   Level: developer
1733 
1734   Note:
1735   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1736   freed when the matrix is destroyed.
1737 
1738 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1739           `MatDenseReplaceArray()`
1740 @*/
1741 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1742 {
1743   PetscFunctionBegin;
1744   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1745   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1746   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1747 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1748   mat->offloadmask = PETSC_OFFLOAD_CPU;
1749 #endif
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 /*@
1754   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1755 
1756   Not Collective
1757 
1758   Input Parameter:
1759 . mat - the matrix
1760 
1761   Level: developer
1762 
1763   Note:
1764   You can only call this after a call to `MatDensePlaceArray()`
1765 
1766 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1767 @*/
1768 PetscErrorCode MatDenseResetArray(Mat mat)
1769 {
1770   PetscFunctionBegin;
1771   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1772   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1773   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1774   PetscFunctionReturn(PETSC_SUCCESS);
1775 }
1776 
1777 /*@
1778   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1779   array provided by the user. This is useful to avoid copying an array
1780   into a matrix
1781 
1782   Not Collective
1783 
1784   Input Parameters:
1785 + mat   - the matrix
1786 - array - the array in column major order
1787 
1788   Level: developer
1789 
1790   Note:
1791   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1792   freed by the user. It will be freed when the matrix is destroyed.
1793 
1794 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1795 @*/
1796 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1797 {
1798   PetscFunctionBegin;
1799   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1800   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1801   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1802 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1803   mat->offloadmask = PETSC_OFFLOAD_CPU;
1804 #endif
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 /*@C
1809   MatCreateDense - Creates a matrix in `MATDENSE` format.
1810 
1811   Collective
1812 
1813   Input Parameters:
1814 + comm - MPI communicator
1815 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1816 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1817 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1818 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1819 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1820    to control all matrix memory allocation.
1821 
1822   Output Parameter:
1823 . A - the matrix
1824 
1825   Level: intermediate
1826 
1827   Notes:
1828   The dense format is fully compatible with standard Fortran
1829   storage by columns.
1830 
1831   Although local portions of the matrix are stored in column-major
1832   order, the matrix is partitioned across MPI ranks by row.
1833 
1834   The data input variable is intended primarily for Fortran programmers
1835   who wish to allocate their own matrix memory space.  Most users should
1836   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1837 
1838   The user MUST specify either the local or global matrix dimensions
1839   (possibly both).
1840 
1841 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1842 @*/
1843 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1844 {
1845   PetscFunctionBegin;
1846   PetscCall(MatCreate(comm, A));
1847   PetscCall(MatSetSizes(*A, m, n, M, N));
1848   PetscCall(MatSetType(*A, MATDENSE));
1849   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1850   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1851   PetscFunctionReturn(PETSC_SUCCESS);
1852 }
1853 
1854 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1855 {
1856   Mat           mat;
1857   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1858 
1859   PetscFunctionBegin;
1860   *newmat = NULL;
1861   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1862   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1863   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1864   a = (Mat_MPIDense *)mat->data;
1865 
1866   mat->factortype   = A->factortype;
1867   mat->assembled    = PETSC_TRUE;
1868   mat->preallocated = PETSC_TRUE;
1869 
1870   mat->insertmode = NOT_SET_VALUES;
1871   a->donotstash   = oldmat->donotstash;
1872 
1873   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1874   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1875 
1876   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1877 
1878   *newmat = mat;
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1883 {
1884   PetscBool isbinary;
1885 #if defined(PETSC_HAVE_HDF5)
1886   PetscBool ishdf5;
1887 #endif
1888 
1889   PetscFunctionBegin;
1890   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1891   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1892   /* force binary viewer to load .info file if it has not yet done so */
1893   PetscCall(PetscViewerSetUp(viewer));
1894   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1895 #if defined(PETSC_HAVE_HDF5)
1896   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1897 #endif
1898   if (isbinary) {
1899     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1900 #if defined(PETSC_HAVE_HDF5)
1901   } else if (ishdf5) {
1902     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1903 #endif
1904   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1905   PetscFunctionReturn(PETSC_SUCCESS);
1906 }
1907 
1908 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1909 {
1910   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1911   Mat           a, b;
1912 
1913   PetscFunctionBegin;
1914   a = matA->A;
1915   b = matB->A;
1916   PetscCall(MatEqual(a, b, flag));
1917   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1918   PetscFunctionReturn(PETSC_SUCCESS);
1919 }
1920 
1921 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1922 {
1923   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1924 
1925   PetscFunctionBegin;
1926   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1927   PetscCall(MatDestroy(&atb->atb));
1928   PetscCall(PetscFree(atb));
1929   PetscFunctionReturn(PETSC_SUCCESS);
1930 }
1931 
1932 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1933 {
1934   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1935 
1936   PetscFunctionBegin;
1937   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1938   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1939   PetscCall(PetscFree(abt));
1940   PetscFunctionReturn(PETSC_SUCCESS);
1941 }
1942 
1943 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1944 {
1945   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1946   Mat_TransMatMultDense *atb;
1947   MPI_Comm               comm;
1948   PetscMPIInt            size, *recvcounts;
1949   PetscScalar           *carray, *sendbuf;
1950   const PetscScalar     *atbarray;
1951   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1952   const PetscInt        *ranges;
1953 
1954   PetscFunctionBegin;
1955   MatCheckProduct(C, 3);
1956   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1957   atb        = (Mat_TransMatMultDense *)C->product->data;
1958   recvcounts = atb->recvcounts;
1959   sendbuf    = atb->sendbuf;
1960 
1961   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1962   PetscCallMPI(MPI_Comm_size(comm, &size));
1963 
1964   /* compute atbarray = aseq^T * bseq */
1965   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1966 
1967   PetscCall(MatGetOwnershipRanges(C, &ranges));
1968 
1969   /* arrange atbarray into sendbuf */
1970   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1971   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1972   for (proc = 0, k = 0; proc < size; proc++) {
1973     for (j = 0; j < cN; j++) {
1974       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1975     }
1976   }
1977   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1978 
1979   /* sum all atbarray to local values of C */
1980   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1981   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1982   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1983   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1984   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1985   PetscFunctionReturn(PETSC_SUCCESS);
1986 }
1987 
1988 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1989 {
1990   MPI_Comm               comm;
1991   PetscMPIInt            size;
1992   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1993   Mat_TransMatMultDense *atb;
1994   PetscBool              cisdense = PETSC_FALSE;
1995   PetscInt               i;
1996   const PetscInt        *ranges;
1997 
1998   PetscFunctionBegin;
1999   MatCheckProduct(C, 4);
2000   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2001   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2002   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2003     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2004   }
2005 
2006   /* create matrix product C */
2007   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2008 #if defined(PETSC_HAVE_CUDA)
2009   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2010 #endif
2011 #if defined(PETSC_HAVE_HIP)
2012   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2013 #endif
2014   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2015   PetscCall(MatSetUp(C));
2016 
2017   /* create data structure for reuse C */
2018   PetscCallMPI(MPI_Comm_size(comm, &size));
2019   PetscCall(PetscNew(&atb));
2020   cM = C->rmap->N;
2021   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2022   PetscCall(MatGetOwnershipRanges(C, &ranges));
2023   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2024 
2025   C->product->data    = atb;
2026   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2027   PetscFunctionReturn(PETSC_SUCCESS);
2028 }
2029 
2030 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2031 {
2032   MPI_Comm               comm;
2033   PetscMPIInt            i, size;
2034   PetscInt               maxRows, bufsiz;
2035   PetscMPIInt            tag;
2036   PetscInt               alg;
2037   Mat_MatTransMultDense *abt;
2038   Mat_Product           *product = C->product;
2039   PetscBool              flg;
2040 
2041   PetscFunctionBegin;
2042   MatCheckProduct(C, 4);
2043   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2044   /* check local size of A and B */
2045   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2046 
2047   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2048   alg = flg ? 0 : 1;
2049 
2050   /* setup matrix product C */
2051   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2052   PetscCall(MatSetType(C, MATMPIDENSE));
2053   PetscCall(MatSetUp(C));
2054   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2055 
2056   /* create data structure for reuse C */
2057   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2058   PetscCallMPI(MPI_Comm_size(comm, &size));
2059   PetscCall(PetscNew(&abt));
2060   abt->tag = tag;
2061   abt->alg = alg;
2062   switch (alg) {
2063   case 1: /* alg: "cyclic" */
2064     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2065     bufsiz = A->cmap->N * maxRows;
2066     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2067     break;
2068   default: /* alg: "allgatherv" */
2069     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2070     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2071     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2072     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2073     break;
2074   }
2075 
2076   C->product->data                = abt;
2077   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2078   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
2082 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2083 {
2084   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2085   Mat_MatTransMultDense *abt;
2086   MPI_Comm               comm;
2087   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2088   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2089   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2090   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2091   const PetscScalar     *av, *bv;
2092   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2093   MPI_Request            reqs[2];
2094   const PetscInt        *ranges;
2095 
2096   PetscFunctionBegin;
2097   MatCheckProduct(C, 3);
2098   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2099   abt = (Mat_MatTransMultDense *)C->product->data;
2100   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2101   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2102   PetscCallMPI(MPI_Comm_size(comm, &size));
2103   PetscCall(MatDenseGetArrayRead(a->A, &av));
2104   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2105   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2106   PetscCall(MatDenseGetLDA(a->A, &i));
2107   PetscCall(PetscBLASIntCast(i, &alda));
2108   PetscCall(MatDenseGetLDA(b->A, &i));
2109   PetscCall(PetscBLASIntCast(i, &blda));
2110   PetscCall(MatDenseGetLDA(c->A, &i));
2111   PetscCall(PetscBLASIntCast(i, &clda));
2112   PetscCall(MatGetOwnershipRanges(B, &ranges));
2113   bn = B->rmap->n;
2114   if (blda == bn) {
2115     sendbuf = (PetscScalar *)bv;
2116   } else {
2117     sendbuf = abt->buf[0];
2118     for (k = 0, i = 0; i < cK; i++) {
2119       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2120     }
2121   }
2122   if (size > 1) {
2123     sendto   = (rank + size - 1) % size;
2124     recvfrom = (rank + size + 1) % size;
2125   } else {
2126     sendto = recvfrom = 0;
2127   }
2128   PetscCall(PetscBLASIntCast(cK, &ck));
2129   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2130   recvisfrom = rank;
2131   for (i = 0; i < size; i++) {
2132     /* we have finished receiving in sending, bufs can be read/modified */
2133     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2134     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2135 
2136     if (nextrecvisfrom != rank) {
2137       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2138       sendsiz = cK * bn;
2139       recvsiz = cK * nextbn;
2140       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2141       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2142       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2143     }
2144 
2145     /* local aseq * sendbuf^T */
2146     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2147     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2148 
2149     if (nextrecvisfrom != rank) {
2150       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2151       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2152     }
2153     bn         = nextbn;
2154     recvisfrom = nextrecvisfrom;
2155     sendbuf    = recvbuf;
2156   }
2157   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2158   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2159   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2160   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2161   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2162   PetscFunctionReturn(PETSC_SUCCESS);
2163 }
2164 
2165 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2166 {
2167   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2168   Mat_MatTransMultDense *abt;
2169   MPI_Comm               comm;
2170   PetscMPIInt            size;
2171   PetscScalar           *cv, *sendbuf, *recvbuf;
2172   const PetscScalar     *av, *bv;
2173   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2174   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2175   PetscBLASInt           cm, cn, ck, alda, clda;
2176 
2177   PetscFunctionBegin;
2178   MatCheckProduct(C, 3);
2179   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2180   abt = (Mat_MatTransMultDense *)C->product->data;
2181   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2182   PetscCallMPI(MPI_Comm_size(comm, &size));
2183   PetscCall(MatDenseGetArrayRead(a->A, &av));
2184   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2185   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2186   PetscCall(MatDenseGetLDA(a->A, &i));
2187   PetscCall(PetscBLASIntCast(i, &alda));
2188   PetscCall(MatDenseGetLDA(b->A, &blda));
2189   PetscCall(MatDenseGetLDA(c->A, &i));
2190   PetscCall(PetscBLASIntCast(i, &clda));
2191   /* copy transpose of B into buf[0] */
2192   bn      = B->rmap->n;
2193   sendbuf = abt->buf[0];
2194   recvbuf = abt->buf[1];
2195   for (k = 0, j = 0; j < bn; j++) {
2196     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2197   }
2198   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2199   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2200   PetscCall(PetscBLASIntCast(cK, &ck));
2201   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2202   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2203   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2204   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2205   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2206   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2207   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2208   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2209   PetscFunctionReturn(PETSC_SUCCESS);
2210 }
2211 
2212 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2213 {
2214   Mat_MatTransMultDense *abt;
2215 
2216   PetscFunctionBegin;
2217   MatCheckProduct(C, 3);
2218   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2219   abt = (Mat_MatTransMultDense *)C->product->data;
2220   switch (abt->alg) {
2221   case 1:
2222     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2223     break;
2224   default:
2225     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2226     break;
2227   }
2228   PetscFunctionReturn(PETSC_SUCCESS);
2229 }
2230 
2231 #if defined(PETSC_HAVE_ELEMENTAL)
2232 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2233 {
2234   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2235 
2236   PetscFunctionBegin;
2237   PetscCall(MatDestroy(&ab->Ce));
2238   PetscCall(MatDestroy(&ab->Ae));
2239   PetscCall(MatDestroy(&ab->Be));
2240   PetscCall(PetscFree(ab));
2241   PetscFunctionReturn(PETSC_SUCCESS);
2242 }
2243 
2244 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2245 {
2246   Mat_MatMultDense *ab;
2247 
2248   PetscFunctionBegin;
2249   MatCheckProduct(C, 3);
2250   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2251   ab = (Mat_MatMultDense *)C->product->data;
2252   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2253   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2254   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2255   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2256   PetscFunctionReturn(PETSC_SUCCESS);
2257 }
2258 
2259 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2260 {
2261   Mat               Ae, Be, Ce;
2262   Mat_MatMultDense *ab;
2263 
2264   PetscFunctionBegin;
2265   MatCheckProduct(C, 4);
2266   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2267   /* check local size of A and B */
2268   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2269     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2270   }
2271 
2272   /* create elemental matrices Ae and Be */
2273   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2274   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2275   PetscCall(MatSetType(Ae, MATELEMENTAL));
2276   PetscCall(MatSetUp(Ae));
2277   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2278 
2279   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2280   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2281   PetscCall(MatSetType(Be, MATELEMENTAL));
2282   PetscCall(MatSetUp(Be));
2283   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2284 
2285   /* compute symbolic Ce = Ae*Be */
2286   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2287   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2288 
2289   /* setup C */
2290   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2291   PetscCall(MatSetType(C, MATDENSE));
2292   PetscCall(MatSetUp(C));
2293 
2294   /* create data structure for reuse Cdense */
2295   PetscCall(PetscNew(&ab));
2296   ab->Ae = Ae;
2297   ab->Be = Be;
2298   ab->Ce = Ce;
2299 
2300   C->product->data       = ab;
2301   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2302   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2303   PetscFunctionReturn(PETSC_SUCCESS);
2304 }
2305 #endif
2306 
2307 #if defined(PETSC_HAVE_ELEMENTAL)
2308 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2309 {
2310   PetscFunctionBegin;
2311   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2312   C->ops->productsymbolic = MatProductSymbolic_AB;
2313   PetscFunctionReturn(PETSC_SUCCESS);
2314 }
2315 #endif
2316 
2317 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2318 {
2319   Mat_Product *product = C->product;
2320   Mat          A = product->A, B = product->B;
2321 
2322   PetscFunctionBegin;
2323   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2324     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2325   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2326   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2327   PetscFunctionReturn(PETSC_SUCCESS);
2328 }
2329 
2330 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2331 {
2332   Mat_Product *product     = C->product;
2333   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2334   PetscInt     alg, nalg = 2;
2335   PetscBool    flg = PETSC_FALSE;
2336 
2337   PetscFunctionBegin;
2338   /* Set default algorithm */
2339   alg = 0; /* default is allgatherv */
2340   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2341   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2342 
2343   /* Get runtime option */
2344   if (product->api_user) {
2345     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2346     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2347     PetscOptionsEnd();
2348   } else {
2349     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2350     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2351     PetscOptionsEnd();
2352   }
2353   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2354 
2355   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2356   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2357   PetscFunctionReturn(PETSC_SUCCESS);
2358 }
2359 
2360 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2361 {
2362   Mat_Product *product = C->product;
2363 
2364   PetscFunctionBegin;
2365   switch (product->type) {
2366 #if defined(PETSC_HAVE_ELEMENTAL)
2367   case MATPRODUCT_AB:
2368     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2369     break;
2370 #endif
2371   case MATPRODUCT_AtB:
2372     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2373     break;
2374   case MATPRODUCT_ABt:
2375     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2376     break;
2377   default:
2378     break;
2379   }
2380   PetscFunctionReturn(PETSC_SUCCESS);
2381 }
2382