xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision bd1587442888ecfa2176fd832616cd1e28a092f3)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right-hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
519   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
520   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
521   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
522   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
523   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
524   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
525   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecSet(yy, 0.0));
539   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555 
556   PetscFunctionBegin;
557   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
558   PetscCall(VecSet(yy, 0.0));
559   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
560   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
561   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
562   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
563   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
564   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
565   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
566   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
571 {
572   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
573   const PetscScalar *ax;
574   PetscScalar       *ay;
575   PetscMemType       axmtype, aymtype;
576 
577   PetscFunctionBegin;
578   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
579   PetscCall(VecCopy(yy, zz));
580   PetscMPIInt rank;
581   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
582   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
583   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
584   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
585   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
586   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
587   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
588   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
593 {
594   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
595   const PetscScalar *ax;
596   PetscScalar       *ay;
597   PetscMemType       axmtype, aymtype;
598 
599   PetscFunctionBegin;
600   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
601   PetscCall(VecCopy(yy, zz));
602   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
603   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
604   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
605   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
606   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
607   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
608   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
609   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
635 {
636   PetscFunctionBegin;
637   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
642 {
643   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
644   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
645   PetscScalar       *x;
646   const PetscScalar *av;
647 
648   PetscFunctionBegin;
649   PetscCall(VecGetArray(v, &x));
650   PetscCall(VecGetSize(v, &ng));
651   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
652   PetscCall(VecGetLocalSize(v, &nl));
653   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
654   radd = A->rmap->rstart * m;
655   PetscCall(MatDenseGetArrayRead(a->A, &av));
656   PetscCall(MatDenseGetLDA(a->A, &lda));
657   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
658   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
659   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
660   PetscCall(VecRestoreArray(v, &x));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
665 {
666   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
667 
668   PetscFunctionBegin;
669   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
670   PetscCall(MatStashDestroy_Private(&mat->stash));
671   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
672   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
673   PetscCall(MatDestroy(&mdn->A));
674   PetscCall(VecDestroy(&mdn->lvec));
675   PetscCall(PetscSFDestroy(&mdn->Mvctx));
676   PetscCall(VecDestroy(&mdn->cvec));
677   PetscCall(MatDestroy(&mdn->cmat));
678 
679   PetscCall(PetscFree(mat->data));
680   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
681 
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
695 #if defined(PETSC_HAVE_ELEMENTAL)
696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
697 #endif
698 #if defined(PETSC_HAVE_SCALAPACK)
699   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
700 #endif
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
703   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
704 #if defined(PETSC_HAVE_CUDA)
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
723 #endif
724 #if defined(PETSC_HAVE_HIP)
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
743 #endif
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
757 
758   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 #include <petscdraw.h>
763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
764 {
765   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
766   PetscMPIInt       rank;
767   PetscViewerType   vtype;
768   PetscBool         iascii, isdraw;
769   PetscViewer       sviewer;
770   PetscViewerFormat format;
771 
772   PetscFunctionBegin;
773   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
775   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
776   if (iascii) {
777     PetscCall(PetscViewerGetType(viewer, &vtype));
778     PetscCall(PetscViewerGetFormat(viewer, &format));
779     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
780       MatInfo info;
781       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
782       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
783       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
784                                                    (PetscInt)info.memory));
785       PetscCall(PetscViewerFlush(viewer));
786       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
787       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
788       PetscFunctionReturn(PETSC_SUCCESS);
789     } else if (format == PETSC_VIEWER_ASCII_INFO) {
790       PetscFunctionReturn(PETSC_SUCCESS);
791     }
792   } else if (isdraw) {
793     PetscDraw draw;
794     PetscBool isnull;
795 
796     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
797     PetscCall(PetscDrawIsNull(draw, &isnull));
798     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
799   }
800 
801   {
802     /* assemble the entire matrix onto first processor. */
803     Mat          A;
804     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
805     PetscInt    *cols;
806     PetscScalar *vals;
807 
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
809     if (rank == 0) {
810       PetscCall(MatSetSizes(A, M, N, M, N));
811     } else {
812       PetscCall(MatSetSizes(A, 0, 0, M, N));
813     }
814     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
815     PetscCall(MatSetType(A, MATMPIDENSE));
816     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
817 
818     /* Copy the matrix ... This isn't the most efficient means,
819        but it's quick for now */
820     A->insertmode = INSERT_VALUES;
821 
822     row = mat->rmap->rstart;
823     m   = mdn->A->rmap->n;
824     for (i = 0; i < m; i++) {
825       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
826       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
827       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
828       row++;
829     }
830 
831     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
833     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
834     if (rank == 0) {
835       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
836       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
837     }
838     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
839     PetscCall(MatDestroy(&A));
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
845 {
846   PetscBool iascii, isbinary, isdraw, issocket;
847 
848   PetscFunctionBegin;
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
852   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
853 
854   if (iascii || issocket || isdraw) {
855     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
856   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
861 {
862   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
863   Mat            mdn = mat->A;
864   PetscLogDouble isend[5], irecv[5];
865 
866   PetscFunctionBegin;
867   info->block_size = 1.0;
868 
869   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
870 
871   isend[0] = info->nz_used;
872   isend[1] = info->nz_allocated;
873   isend[2] = info->nz_unneeded;
874   isend[3] = info->memory;
875   isend[4] = info->mallocs;
876   if (flag == MAT_LOCAL) {
877     info->nz_used      = isend[0];
878     info->nz_allocated = isend[1];
879     info->nz_unneeded  = isend[2];
880     info->memory       = isend[3];
881     info->mallocs      = isend[4];
882   } else if (flag == MAT_GLOBAL_MAX) {
883     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
884 
885     info->nz_used      = irecv[0];
886     info->nz_allocated = irecv[1];
887     info->nz_unneeded  = irecv[2];
888     info->memory       = irecv[3];
889     info->mallocs      = irecv[4];
890   } else if (flag == MAT_GLOBAL_SUM) {
891     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
892 
893     info->nz_used      = irecv[0];
894     info->nz_allocated = irecv[1];
895     info->nz_unneeded  = irecv[2];
896     info->memory       = irecv[3];
897     info->mallocs      = irecv[4];
898   }
899   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
900   info->fill_ratio_needed = 0;
901   info->factor_mallocs    = 0;
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
906 {
907   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
908 
909   PetscFunctionBegin;
910   switch (op) {
911   case MAT_NEW_NONZERO_LOCATIONS:
912   case MAT_NEW_NONZERO_LOCATION_ERR:
913   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914     MatCheckPreallocated(A, 1);
915     PetscCall(MatSetOption(a->A, op, flg));
916     break;
917   case MAT_ROW_ORIENTED:
918     MatCheckPreallocated(A, 1);
919     a->roworiented = flg;
920     PetscCall(MatSetOption(a->A, op, flg));
921     break;
922   case MAT_FORCE_DIAGONAL_ENTRIES:
923   case MAT_KEEP_NONZERO_PATTERN:
924   case MAT_USE_HASH_TABLE:
925   case MAT_SORTED_FULL:
926     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
927     break;
928   case MAT_IGNORE_OFF_PROC_ENTRIES:
929     a->donotstash = flg;
930     break;
931   case MAT_SYMMETRIC:
932   case MAT_STRUCTURALLY_SYMMETRIC:
933   case MAT_HERMITIAN:
934   case MAT_SYMMETRY_ETERNAL:
935   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
936   case MAT_SPD:
937   case MAT_IGNORE_LOWER_TRIANGULAR:
938   case MAT_IGNORE_ZERO_ENTRIES:
939   case MAT_SPD_ETERNAL:
940     /* if the diagonal matrix is square it inherits some of the properties above */
941     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
942     break;
943   default:
944     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
945   }
946   PetscFunctionReturn(PETSC_SUCCESS);
947 }
948 
949 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
950 {
951   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
952   const PetscScalar *l;
953   PetscScalar        x, *v, *vv, *r;
954   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
955 
956   PetscFunctionBegin;
957   PetscCall(MatDenseGetArray(mdn->A, &vv));
958   PetscCall(MatDenseGetLDA(mdn->A, &lda));
959   PetscCall(MatGetLocalSize(A, &s2, &s3));
960   if (ll) {
961     PetscCall(VecGetLocalSize(ll, &s2a));
962     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
963     PetscCall(VecGetArrayRead(ll, &l));
964     for (i = 0; i < m; i++) {
965       x = l[i];
966       v = vv + i;
967       for (j = 0; j < n; j++) {
968         (*v) *= x;
969         v += lda;
970       }
971     }
972     PetscCall(VecRestoreArrayRead(ll, &l));
973     PetscCall(PetscLogFlops(1.0 * n * m));
974   }
975   if (rr) {
976     const PetscScalar *ar;
977 
978     PetscCall(VecGetLocalSize(rr, &s3a));
979     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
980     PetscCall(VecGetArrayRead(rr, &ar));
981     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
982     PetscCall(VecGetArray(mdn->lvec, &r));
983     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
984     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
985     PetscCall(VecRestoreArrayRead(rr, &ar));
986     for (i = 0; i < n; i++) {
987       x = r[i];
988       v = vv + i * lda;
989       for (j = 0; j < m; j++) (*v++) *= x;
990     }
991     PetscCall(VecRestoreArray(mdn->lvec, &r));
992     PetscCall(PetscLogFlops(1.0 * n * m));
993   }
994   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
995   PetscFunctionReturn(PETSC_SUCCESS);
996 }
997 
998 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
999 {
1000   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1001   PetscInt           i, j;
1002   PetscMPIInt        size, iN;
1003   PetscReal          sum = 0.0;
1004   const PetscScalar *av, *v;
1005 
1006   PetscFunctionBegin;
1007   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1008   v = av;
1009   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1010   if (size == 1) {
1011     PetscCall(MatNorm(mdn->A, type, nrm));
1012   } else {
1013     if (type == NORM_FROBENIUS) {
1014       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1015         sum += PetscRealPart(PetscConj(*v) * (*v));
1016         v++;
1017       }
1018       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1019       *nrm = PetscSqrtReal(*nrm);
1020       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1021     } else if (type == NORM_1) {
1022       PetscReal *tmp, *tmp2;
1023       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
1024       *nrm = 0.0;
1025       v    = av;
1026       for (j = 0; j < mdn->A->cmap->n; j++) {
1027         for (i = 0; i < mdn->A->rmap->n; i++) {
1028           tmp[j] += PetscAbsScalar(*v);
1029           v++;
1030         }
1031       }
1032       PetscCall(PetscMPIIntCast(A->cmap->N, &iN));
1033       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1034       for (j = 0; j < A->cmap->N; j++) {
1035         if (tmp2[j] > *nrm) *nrm = tmp2[j];
1036       }
1037       PetscCall(PetscFree2(tmp, tmp2));
1038       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1039     } else if (type == NORM_INFINITY) { /* max row norm */
1040       PetscReal ntemp;
1041       PetscCall(MatNorm(mdn->A, type, &ntemp));
1042       PetscCallMPI(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1043     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1044   }
1045   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1046   PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 
1049 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1050 {
1051   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1052   Mat           B;
1053   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1054   PetscInt      j, i, lda;
1055   PetscScalar  *v;
1056 
1057   PetscFunctionBegin;
1058   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1059   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1060     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1061     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1062     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1063     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1064   } else B = *matout;
1065 
1066   m = a->A->rmap->n;
1067   n = a->A->cmap->n;
1068   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1069   PetscCall(MatDenseGetLDA(a->A, &lda));
1070   PetscCall(PetscMalloc1(m, &rwork));
1071   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1072   for (j = 0; j < n; j++) {
1073     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1074     v = PetscSafePointerPlusOffset(v, lda);
1075   }
1076   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1077   PetscCall(PetscFree(rwork));
1078   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1079   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1080   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1081     *matout = B;
1082   } else {
1083     PetscCall(MatHeaderMerge(A, &B));
1084   }
1085   PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087 
1088 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1089 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1090 
1091 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1092 {
1093   PetscFunctionBegin;
1094   PetscCall(PetscLayoutSetUp(A->rmap));
1095   PetscCall(PetscLayoutSetUp(A->cmap));
1096   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1101 {
1102   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1110 {
1111   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(MatConjugate(a->A));
1115   PetscFunctionReturn(PETSC_SUCCESS);
1116 }
1117 
1118 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1119 {
1120   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1121 
1122   PetscFunctionBegin;
1123   PetscCall(MatRealPart(a->A));
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1128 {
1129   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1130 
1131   PetscFunctionBegin;
1132   PetscCall(MatImaginaryPart(a->A));
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1137 {
1138   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1139 
1140   PetscFunctionBegin;
1141   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1142   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1143   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1148 
1149 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1150 {
1151   PetscInt      i, m, n;
1152   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1153   PetscReal    *work;
1154   PetscMPIInt   in;
1155 
1156   PetscFunctionBegin;
1157   PetscCall(MatGetSize(A, &m, &n));
1158   PetscCall(PetscMalloc1(n, &work));
1159   if (type == REDUCTION_MEAN_REALPART) {
1160     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1161   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1162     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1163   } else {
1164     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1165   }
1166   if (type == NORM_2) {
1167     for (i = 0; i < n; i++) work[i] *= work[i];
1168   }
1169   PetscCall(PetscMPIIntCast(n, &in));
1170   if (type == NORM_INFINITY) {
1171     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1172   } else {
1173     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1174   }
1175   PetscCall(PetscFree(work));
1176   if (type == NORM_2) {
1177     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1178   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1179     for (i = 0; i < n; i++) reductions[i] /= m;
1180   }
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1185 {
1186   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1187 
1188   PetscFunctionBegin;
1189   PetscCall(MatSetRandom(d->A, rctx));
1190 #if defined(PETSC_HAVE_DEVICE)
1191   x->offloadmask = d->A->offloadmask;
1192 #endif
1193   PetscFunctionReturn(PETSC_SUCCESS);
1194 }
1195 
1196 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1197 {
1198   PetscFunctionBegin;
1199   *missing = PETSC_FALSE;
1200   PetscFunctionReturn(PETSC_SUCCESS);
1201 }
1202 
1203 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1204 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1205 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1206 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1207 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1208 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1209 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1210 
1211 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1212                                        MatGetRow_MPIDense,
1213                                        MatRestoreRow_MPIDense,
1214                                        MatMult_MPIDense,
1215                                        /*  4*/ MatMultAdd_MPIDense,
1216                                        MatMultTranspose_MPIDense,
1217                                        MatMultTransposeAdd_MPIDense,
1218                                        NULL,
1219                                        NULL,
1220                                        NULL,
1221                                        /* 10*/ NULL,
1222                                        NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        MatTranspose_MPIDense,
1226                                        /* 15*/ MatGetInfo_MPIDense,
1227                                        MatEqual_MPIDense,
1228                                        MatGetDiagonal_MPIDense,
1229                                        MatDiagonalScale_MPIDense,
1230                                        MatNorm_MPIDense,
1231                                        /* 20*/ MatAssemblyBegin_MPIDense,
1232                                        MatAssemblyEnd_MPIDense,
1233                                        MatSetOption_MPIDense,
1234                                        MatZeroEntries_MPIDense,
1235                                        /* 24*/ MatZeroRows_MPIDense,
1236                                        NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        NULL,
1240                                        /* 29*/ MatSetUp_MPIDense,
1241                                        NULL,
1242                                        NULL,
1243                                        MatGetDiagonalBlock_MPIDense,
1244                                        NULL,
1245                                        /* 34*/ MatDuplicate_MPIDense,
1246                                        NULL,
1247                                        NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        /* 39*/ MatAXPY_MPIDense,
1251                                        MatCreateSubMatrices_MPIDense,
1252                                        NULL,
1253                                        MatGetValues_MPIDense,
1254                                        MatCopy_MPIDense,
1255                                        /* 44*/ NULL,
1256                                        MatScale_MPIDense,
1257                                        MatShift_MPIDense,
1258                                        NULL,
1259                                        NULL,
1260                                        /* 49*/ MatSetRandom_MPIDense,
1261                                        NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        /* 54*/ NULL,
1266                                        NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        NULL,
1270                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1271                                        MatDestroy_MPIDense,
1272                                        MatView_MPIDense,
1273                                        NULL,
1274                                        NULL,
1275                                        /* 64*/ NULL,
1276                                        NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        /* 69*/ NULL,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        /* 74*/ NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        /* 79*/ NULL,
1291                                        NULL,
1292                                        NULL,
1293                                        NULL,
1294                                        /* 83*/ MatLoad_MPIDense,
1295                                        NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        /* 89*/ NULL,
1301                                        NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        NULL,
1305                                        /* 94*/ NULL,
1306                                        NULL,
1307                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1308                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1309                                        NULL,
1310                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1311                                        NULL,
1312                                        NULL,
1313                                        MatConjugate_MPIDense,
1314                                        NULL,
1315                                        /*104*/ NULL,
1316                                        MatRealPart_MPIDense,
1317                                        MatImaginaryPart_MPIDense,
1318                                        NULL,
1319                                        NULL,
1320                                        /*109*/ NULL,
1321                                        NULL,
1322                                        NULL,
1323                                        MatGetColumnVector_MPIDense,
1324                                        MatMissingDiagonal_MPIDense,
1325                                        /*114*/ NULL,
1326                                        NULL,
1327                                        NULL,
1328                                        NULL,
1329                                        NULL,
1330                                        /*119*/ NULL,
1331                                        NULL,
1332                                        MatMultHermitianTranspose_MPIDense,
1333                                        MatMultHermitianTransposeAdd_MPIDense,
1334                                        NULL,
1335                                        /*124*/ NULL,
1336                                        MatGetColumnReductions_MPIDense,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        /*129*/ NULL,
1341                                        NULL,
1342                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1343                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1344                                        NULL,
1345                                        /*134*/ NULL,
1346                                        NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        /*139*/ NULL,
1351                                        NULL,
1352                                        NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1356                                        /*145*/ NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        /*150*/ NULL,
1362                                        NULL,
1363                                        NULL,
1364                                        NULL,
1365                                        NULL,
1366                                        NULL};
1367 
1368 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1369 {
1370   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1371   MatType       mtype = MATSEQDENSE;
1372 
1373   PetscFunctionBegin;
1374   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1375   PetscCall(PetscLayoutSetUp(mat->rmap));
1376   PetscCall(PetscLayoutSetUp(mat->cmap));
1377   if (!a->A) {
1378     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1379     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1380   }
1381 #if defined(PETSC_HAVE_CUDA)
1382   PetscBool iscuda;
1383   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1384   if (iscuda) mtype = MATSEQDENSECUDA;
1385 #endif
1386 #if defined(PETSC_HAVE_HIP)
1387   PetscBool iship;
1388   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1389   if (iship) mtype = MATSEQDENSEHIP;
1390 #endif
1391   PetscCall(MatSetType(a->A, mtype));
1392   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1393 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1394   mat->offloadmask = a->A->offloadmask;
1395 #endif
1396   mat->preallocated = PETSC_TRUE;
1397   mat->assembled    = PETSC_TRUE;
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1402 {
1403   Mat B, C;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1407   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1408   PetscCall(MatDestroy(&C));
1409   if (reuse == MAT_REUSE_MATRIX) {
1410     C = *newmat;
1411   } else C = NULL;
1412   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1413   PetscCall(MatDestroy(&B));
1414   if (reuse == MAT_INPLACE_MATRIX) {
1415     PetscCall(MatHeaderReplace(A, &C));
1416   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1417   PetscFunctionReturn(PETSC_SUCCESS);
1418 }
1419 
1420 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1421 {
1422   Mat B, C;
1423 
1424   PetscFunctionBegin;
1425   PetscCall(MatDenseGetLocalMatrix(A, &C));
1426   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1427   if (reuse == MAT_REUSE_MATRIX) {
1428     C = *newmat;
1429   } else C = NULL;
1430   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1431   PetscCall(MatDestroy(&B));
1432   if (reuse == MAT_INPLACE_MATRIX) {
1433     PetscCall(MatHeaderReplace(A, &C));
1434   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1435   PetscFunctionReturn(PETSC_SUCCESS);
1436 }
1437 
1438 #if defined(PETSC_HAVE_ELEMENTAL)
1439 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1440 {
1441   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1442   Mat           mat_elemental;
1443   PetscScalar  *v;
1444   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1445 
1446   PetscFunctionBegin;
1447   if (reuse == MAT_REUSE_MATRIX) {
1448     mat_elemental = *newmat;
1449     PetscCall(MatZeroEntries(*newmat));
1450   } else {
1451     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1452     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1453     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1454     PetscCall(MatSetUp(mat_elemental));
1455     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1456   }
1457 
1458   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1459   for (i = 0; i < N; i++) cols[i] = i;
1460   for (i = 0; i < m; i++) rows[i] = rstart + i;
1461 
1462   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1463   PetscCall(MatDenseGetArray(A, &v));
1464   PetscCall(MatDenseGetLDA(a->A, &lda));
1465   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1466   else {
1467     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1468   }
1469   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1470   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1471   PetscCall(MatDenseRestoreArray(A, &v));
1472   PetscCall(PetscFree2(rows, cols));
1473 
1474   if (reuse == MAT_INPLACE_MATRIX) {
1475     PetscCall(MatHeaderReplace(A, &mat_elemental));
1476   } else {
1477     *newmat = mat_elemental;
1478   }
1479   PetscFunctionReturn(PETSC_SUCCESS);
1480 }
1481 #endif
1482 
1483 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1484 {
1485   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1486 
1487   PetscFunctionBegin;
1488   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1489   PetscFunctionReturn(PETSC_SUCCESS);
1490 }
1491 
1492 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1493 {
1494   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1495 
1496   PetscFunctionBegin;
1497   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1498   PetscFunctionReturn(PETSC_SUCCESS);
1499 }
1500 
1501 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1502 {
1503   Mat_MPIDense *mat;
1504   PetscInt      m, nloc, N;
1505 
1506   PetscFunctionBegin;
1507   PetscCall(MatGetSize(inmat, &m, &N));
1508   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1509   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1510     PetscInt sum;
1511 
1512     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1513     /* Check sum(n) = N */
1514     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1515     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1516 
1517     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1518     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1519   }
1520 
1521   /* numeric phase */
1522   mat = (Mat_MPIDense *)(*outmat)->data;
1523   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1524   PetscFunctionReturn(PETSC_SUCCESS);
1525 }
1526 
1527 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1528 {
1529   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1530   PetscInt      lda;
1531 
1532   PetscFunctionBegin;
1533   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1534   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1535   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1536   a->vecinuse = col + 1;
1537   PetscCall(MatDenseGetLDA(a->A, &lda));
1538   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1539   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1540   *v = a->cvec;
1541   PetscFunctionReturn(PETSC_SUCCESS);
1542 }
1543 
1544 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1545 {
1546   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1547 
1548   PetscFunctionBegin;
1549   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1550   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1551   VecCheckAssembled(a->cvec);
1552   a->vecinuse = 0;
1553   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1554   PetscCall(VecResetArray(a->cvec));
1555   if (v) *v = NULL;
1556   PetscFunctionReturn(PETSC_SUCCESS);
1557 }
1558 
1559 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1560 {
1561   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1562   PetscInt      lda;
1563 
1564   PetscFunctionBegin;
1565   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1566   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1567   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1568   a->vecinuse = col + 1;
1569   PetscCall(MatDenseGetLDA(a->A, &lda));
1570   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1571   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1572   PetscCall(VecLockReadPush(a->cvec));
1573   *v = a->cvec;
1574   PetscFunctionReturn(PETSC_SUCCESS);
1575 }
1576 
1577 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1578 {
1579   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1580 
1581   PetscFunctionBegin;
1582   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1583   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1584   VecCheckAssembled(a->cvec);
1585   a->vecinuse = 0;
1586   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1587   PetscCall(VecLockReadPop(a->cvec));
1588   PetscCall(VecResetArray(a->cvec));
1589   if (v) *v = NULL;
1590   PetscFunctionReturn(PETSC_SUCCESS);
1591 }
1592 
1593 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1594 {
1595   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1596   PetscInt      lda;
1597 
1598   PetscFunctionBegin;
1599   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1600   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1601   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1602   a->vecinuse = col + 1;
1603   PetscCall(MatDenseGetLDA(a->A, &lda));
1604   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1605   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1606   *v = a->cvec;
1607   PetscFunctionReturn(PETSC_SUCCESS);
1608 }
1609 
1610 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1611 {
1612   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1613 
1614   PetscFunctionBegin;
1615   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1616   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1617   VecCheckAssembled(a->cvec);
1618   a->vecinuse = 0;
1619   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1620   PetscCall(VecResetArray(a->cvec));
1621   if (v) *v = NULL;
1622   PetscFunctionReturn(PETSC_SUCCESS);
1623 }
1624 
1625 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1626 {
1627   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1628   Mat_MPIDense *c;
1629   MPI_Comm      comm;
1630   PetscInt      pbegin, pend;
1631 
1632   PetscFunctionBegin;
1633   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1634   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1635   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1636   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1637   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1638   if (!a->cmat) {
1639     PetscCall(MatCreate(comm, &a->cmat));
1640     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1641     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1642     else {
1643       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1644       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1645       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1646     }
1647     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1648     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1649   } else {
1650     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1651     if (same && a->cmat->rmap->N != A->rmap->N) {
1652       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1653       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1654     }
1655     if (!same) {
1656       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1657       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1658       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1659       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1660       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1661     }
1662     if (cend - cbegin != a->cmat->cmap->N) {
1663       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1664       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1665       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1666       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1667     }
1668   }
1669   c = (Mat_MPIDense *)a->cmat->data;
1670   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1671   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1672 
1673   a->cmat->preallocated = PETSC_TRUE;
1674   a->cmat->assembled    = PETSC_TRUE;
1675 #if defined(PETSC_HAVE_DEVICE)
1676   a->cmat->offloadmask = c->A->offloadmask;
1677 #endif
1678   a->matinuse = cbegin + 1;
1679   *v          = a->cmat;
1680   PetscFunctionReturn(PETSC_SUCCESS);
1681 }
1682 
1683 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1684 {
1685   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1686   Mat_MPIDense *c;
1687 
1688   PetscFunctionBegin;
1689   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1690   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1691   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1692   a->matinuse = 0;
1693   c           = (Mat_MPIDense *)a->cmat->data;
1694   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1695   if (v) *v = NULL;
1696 #if defined(PETSC_HAVE_DEVICE)
1697   A->offloadmask = a->A->offloadmask;
1698 #endif
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 /*MC
1703    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1704 
1705    Options Database Key:
1706 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1707 
1708   Level: beginner
1709 
1710 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1711 M*/
1712 PetscErrorCode MatCreate_MPIDense(Mat mat)
1713 {
1714   Mat_MPIDense *a;
1715 
1716   PetscFunctionBegin;
1717   PetscCall(PetscNew(&a));
1718   mat->data   = (void *)a;
1719   mat->ops[0] = MatOps_Values;
1720 
1721   mat->insertmode = NOT_SET_VALUES;
1722 
1723   /* build cache for off array entries formed */
1724   a->donotstash = PETSC_FALSE;
1725 
1726   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1727 
1728   /* stuff used for matrix vector multiply */
1729   a->lvec        = NULL;
1730   a->Mvctx       = NULL;
1731   a->roworiented = PETSC_TRUE;
1732 
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1754 #if defined(PETSC_HAVE_ELEMENTAL)
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1756 #endif
1757 #if defined(PETSC_HAVE_SCALAPACK)
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1759 #endif
1760 #if defined(PETSC_HAVE_CUDA)
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1762 #endif
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1766 #if defined(PETSC_HAVE_CUDA)
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1769 #endif
1770 #if defined(PETSC_HAVE_HIP)
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1774 #endif
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1780   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1781   PetscFunctionReturn(PETSC_SUCCESS);
1782 }
1783 
1784 /*MC
1785    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1786 
1787    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1788    and `MATMPIDENSE` otherwise.
1789 
1790    Options Database Key:
1791 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1792 
1793   Level: beginner
1794 
1795 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1796 M*/
1797 
1798 /*@
1799   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1800 
1801   Collective
1802 
1803   Input Parameters:
1804 + B    - the matrix
1805 - data - optional location of matrix data.  Set to `NULL` for PETSc
1806          to control all matrix memory allocation.
1807 
1808   Level: intermediate
1809 
1810   Notes:
1811   The dense format is fully compatible with standard Fortran
1812   storage by columns.
1813 
1814   The data input variable is intended primarily for Fortran programmers
1815   who wish to allocate their own matrix memory space.  Most users should
1816   set `data` to `NULL`.
1817 
1818 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1819 @*/
1820 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1821 {
1822   PetscFunctionBegin;
1823   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1824   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1825   PetscFunctionReturn(PETSC_SUCCESS);
1826 }
1827 
1828 /*@
1829   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1830   array provided by the user. This is useful to avoid copying an array
1831   into a matrix
1832 
1833   Not Collective
1834 
1835   Input Parameters:
1836 + mat   - the matrix
1837 - array - the array in column major order
1838 
1839   Level: developer
1840 
1841   Note:
1842   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1843   freed when the matrix is destroyed.
1844 
1845 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1846           `MatDenseReplaceArray()`
1847 @*/
1848 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1849 {
1850   PetscFunctionBegin;
1851   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1852   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1853   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1854 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1855   mat->offloadmask = PETSC_OFFLOAD_CPU;
1856 #endif
1857   PetscFunctionReturn(PETSC_SUCCESS);
1858 }
1859 
1860 /*@
1861   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1862 
1863   Not Collective
1864 
1865   Input Parameter:
1866 . mat - the matrix
1867 
1868   Level: developer
1869 
1870   Note:
1871   You can only call this after a call to `MatDensePlaceArray()`
1872 
1873 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1874 @*/
1875 PetscErrorCode MatDenseResetArray(Mat mat)
1876 {
1877   PetscFunctionBegin;
1878   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1879   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1880   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
1884 /*@
1885   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1886   array provided by the user. This is useful to avoid copying an array
1887   into a matrix
1888 
1889   Not Collective
1890 
1891   Input Parameters:
1892 + mat   - the matrix
1893 - array - the array in column major order
1894 
1895   Level: developer
1896 
1897   Note:
1898   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1899   freed by the user. It will be freed when the matrix is destroyed.
1900 
1901 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1902 @*/
1903 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1904 {
1905   PetscFunctionBegin;
1906   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1907   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1908   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1909 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1910   mat->offloadmask = PETSC_OFFLOAD_CPU;
1911 #endif
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 /*@
1916   MatCreateDense - Creates a matrix in `MATDENSE` format.
1917 
1918   Collective
1919 
1920   Input Parameters:
1921 + comm - MPI communicator
1922 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1923 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1924 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1925 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1926 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1927    to control all matrix memory allocation.
1928 
1929   Output Parameter:
1930 . A - the matrix
1931 
1932   Level: intermediate
1933 
1934   Notes:
1935   The dense format is fully compatible with standard Fortran
1936   storage by columns.
1937 
1938   Although local portions of the matrix are stored in column-major
1939   order, the matrix is partitioned across MPI ranks by row.
1940 
1941   The data input variable is intended primarily for Fortran programmers
1942   who wish to allocate their own matrix memory space.  Most users should
1943   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1944 
1945   The user MUST specify either the local or global matrix dimensions
1946   (possibly both).
1947 
1948 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1949 @*/
1950 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1951 {
1952   PetscFunctionBegin;
1953   PetscCall(MatCreate(comm, A));
1954   PetscCall(MatSetSizes(*A, m, n, M, N));
1955   PetscCall(MatSetType(*A, MATDENSE));
1956   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1957   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1958   PetscFunctionReturn(PETSC_SUCCESS);
1959 }
1960 
1961 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1962 {
1963   Mat           mat;
1964   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1965 
1966   PetscFunctionBegin;
1967   *newmat = NULL;
1968   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1969   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1970   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1971   a = (Mat_MPIDense *)mat->data;
1972 
1973   mat->factortype   = A->factortype;
1974   mat->assembled    = PETSC_TRUE;
1975   mat->preallocated = PETSC_TRUE;
1976 
1977   mat->insertmode = NOT_SET_VALUES;
1978   a->donotstash   = oldmat->donotstash;
1979 
1980   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1981   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1982 
1983   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1984 
1985   *newmat = mat;
1986   PetscFunctionReturn(PETSC_SUCCESS);
1987 }
1988 
1989 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1990 {
1991   PetscBool isbinary;
1992 #if defined(PETSC_HAVE_HDF5)
1993   PetscBool ishdf5;
1994 #endif
1995 
1996   PetscFunctionBegin;
1997   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1998   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1999   /* force binary viewer to load .info file if it has not yet done so */
2000   PetscCall(PetscViewerSetUp(viewer));
2001   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2002 #if defined(PETSC_HAVE_HDF5)
2003   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2004 #endif
2005   if (isbinary) {
2006     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2007 #if defined(PETSC_HAVE_HDF5)
2008   } else if (ishdf5) {
2009     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2010 #endif
2011   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2012   PetscFunctionReturn(PETSC_SUCCESS);
2013 }
2014 
2015 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2016 {
2017   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2018   Mat           a, b;
2019 
2020   PetscFunctionBegin;
2021   a = matA->A;
2022   b = matB->A;
2023   PetscCall(MatEqual(a, b, flag));
2024   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2029 {
2030   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2031 
2032   PetscFunctionBegin;
2033   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2034   PetscCall(MatDestroy(&atb->atb));
2035   PetscCall(PetscFree(atb));
2036   PetscFunctionReturn(PETSC_SUCCESS);
2037 }
2038 
2039 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2040 {
2041   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2042 
2043   PetscFunctionBegin;
2044   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2045   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2046   PetscCall(PetscFree(abt));
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2051 {
2052   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2053   Mat_TransMatMultDense *atb;
2054   MPI_Comm               comm;
2055   PetscMPIInt            size, *recvcounts;
2056   PetscScalar           *carray, *sendbuf;
2057   const PetscScalar     *atbarray;
2058   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2059   const PetscInt        *ranges;
2060 
2061   PetscFunctionBegin;
2062   MatCheckProduct(C, 3);
2063   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2064   atb        = (Mat_TransMatMultDense *)C->product->data;
2065   recvcounts = atb->recvcounts;
2066   sendbuf    = atb->sendbuf;
2067 
2068   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2069   PetscCallMPI(MPI_Comm_size(comm, &size));
2070 
2071   /* compute atbarray = aseq^T * bseq */
2072   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2073 
2074   PetscCall(MatGetOwnershipRanges(C, &ranges));
2075 
2076   /* arrange atbarray into sendbuf */
2077   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2078   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2079   for (proc = 0, k = 0; proc < size; proc++) {
2080     for (j = 0; j < cN; j++) {
2081       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2082     }
2083   }
2084   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2085 
2086   /* sum all atbarray to local values of C */
2087   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2088   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2089   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2090   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2091   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2092   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2093   PetscFunctionReturn(PETSC_SUCCESS);
2094 }
2095 
2096 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2097 {
2098   MPI_Comm               comm;
2099   PetscMPIInt            size;
2100   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2101   Mat_TransMatMultDense *atb;
2102   PetscBool              cisdense = PETSC_FALSE;
2103   const PetscInt        *ranges;
2104 
2105   PetscFunctionBegin;
2106   MatCheckProduct(C, 4);
2107   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2108   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2109   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2110     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2111   }
2112 
2113   /* create matrix product C */
2114   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2115 #if defined(PETSC_HAVE_CUDA)
2116   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2117 #endif
2118 #if defined(PETSC_HAVE_HIP)
2119   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2120 #endif
2121   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2122   PetscCall(MatSetUp(C));
2123 
2124   /* create data structure for reuse C */
2125   PetscCallMPI(MPI_Comm_size(comm, &size));
2126   PetscCall(PetscNew(&atb));
2127   cM = C->rmap->N;
2128   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2129   PetscCall(MatGetOwnershipRanges(C, &ranges));
2130   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2131   C->product->data    = atb;
2132   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2137 {
2138   MPI_Comm               comm;
2139   PetscMPIInt            i, size;
2140   PetscInt               maxRows, bufsiz;
2141   PetscMPIInt            tag;
2142   PetscInt               alg;
2143   Mat_MatTransMultDense *abt;
2144   Mat_Product           *product = C->product;
2145   PetscBool              flg;
2146 
2147   PetscFunctionBegin;
2148   MatCheckProduct(C, 4);
2149   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2150   /* check local size of A and B */
2151   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2152 
2153   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2154   alg = flg ? 0 : 1;
2155 
2156   /* setup matrix product C */
2157   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2158   PetscCall(MatSetType(C, MATMPIDENSE));
2159   PetscCall(MatSetUp(C));
2160   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2161 
2162   /* create data structure for reuse C */
2163   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2164   PetscCallMPI(MPI_Comm_size(comm, &size));
2165   PetscCall(PetscNew(&abt));
2166   abt->tag = tag;
2167   abt->alg = alg;
2168   switch (alg) {
2169   case 1: /* alg: "cyclic" */
2170     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2171     bufsiz = A->cmap->N * maxRows;
2172     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2173     break;
2174   default: /* alg: "allgatherv" */
2175     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2176     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2177     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2178     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2179     break;
2180   }
2181 
2182   C->product->data                = abt;
2183   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2184   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2185   PetscFunctionReturn(PETSC_SUCCESS);
2186 }
2187 
2188 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2189 {
2190   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2191   Mat_MatTransMultDense *abt;
2192   MPI_Comm               comm;
2193   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2194   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2195   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2196   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2197   const PetscScalar     *av, *bv;
2198   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2199   MPI_Request            reqs[2];
2200   const PetscInt        *ranges;
2201 
2202   PetscFunctionBegin;
2203   MatCheckProduct(C, 3);
2204   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2205   abt = (Mat_MatTransMultDense *)C->product->data;
2206   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2207   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2208   PetscCallMPI(MPI_Comm_size(comm, &size));
2209   PetscCall(MatDenseGetArrayRead(a->A, &av));
2210   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2211   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2212   PetscCall(MatDenseGetLDA(a->A, &i));
2213   PetscCall(PetscBLASIntCast(i, &alda));
2214   PetscCall(MatDenseGetLDA(b->A, &i));
2215   PetscCall(PetscBLASIntCast(i, &blda));
2216   PetscCall(MatDenseGetLDA(c->A, &i));
2217   PetscCall(PetscBLASIntCast(i, &clda));
2218   PetscCall(MatGetOwnershipRanges(B, &ranges));
2219   bn = B->rmap->n;
2220   if (blda == bn) {
2221     sendbuf = (PetscScalar *)bv;
2222   } else {
2223     sendbuf = abt->buf[0];
2224     for (k = 0, i = 0; i < cK; i++) {
2225       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2226     }
2227   }
2228   if (size > 1) {
2229     sendto   = (rank + size - 1) % size;
2230     recvfrom = (rank + size + 1) % size;
2231   } else {
2232     sendto = recvfrom = 0;
2233   }
2234   PetscCall(PetscBLASIntCast(cK, &ck));
2235   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2236   recvisfrom = rank;
2237   for (i = 0; i < size; i++) {
2238     /* we have finished receiving in sending, bufs can be read/modified */
2239     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2240     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2241 
2242     if (nextrecvisfrom != rank) {
2243       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2244       sendsiz = cK * bn;
2245       recvsiz = cK * nextbn;
2246       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2247       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2248       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2249     }
2250 
2251     /* local aseq * sendbuf^T */
2252     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2253     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2254 
2255     if (nextrecvisfrom != rank) {
2256       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2257       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2258     }
2259     bn         = nextbn;
2260     recvisfrom = nextrecvisfrom;
2261     sendbuf    = recvbuf;
2262   }
2263   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2264   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2265   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2266   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2267   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2268   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2269   PetscFunctionReturn(PETSC_SUCCESS);
2270 }
2271 
2272 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2273 {
2274   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2275   Mat_MatTransMultDense *abt;
2276   MPI_Comm               comm;
2277   PetscMPIInt            size, ibn;
2278   PetscScalar           *cv, *sendbuf, *recvbuf;
2279   const PetscScalar     *av, *bv;
2280   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2281   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2282   PetscBLASInt           cm, cn, ck, alda, clda;
2283 
2284   PetscFunctionBegin;
2285   MatCheckProduct(C, 3);
2286   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2287   abt = (Mat_MatTransMultDense *)C->product->data;
2288   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2289   PetscCallMPI(MPI_Comm_size(comm, &size));
2290   PetscCall(MatDenseGetArrayRead(a->A, &av));
2291   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2292   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2293   PetscCall(MatDenseGetLDA(a->A, &i));
2294   PetscCall(PetscBLASIntCast(i, &alda));
2295   PetscCall(MatDenseGetLDA(b->A, &blda));
2296   PetscCall(MatDenseGetLDA(c->A, &i));
2297   PetscCall(PetscBLASIntCast(i, &clda));
2298   /* copy transpose of B into buf[0] */
2299   bn      = B->rmap->n;
2300   sendbuf = abt->buf[0];
2301   recvbuf = abt->buf[1];
2302   for (k = 0, j = 0; j < bn; j++) {
2303     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2304   }
2305   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2306   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2307   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2308   PetscCall(PetscBLASIntCast(cK, &ck));
2309   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2310   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2311   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2312   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2313   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2314   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2315   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2316   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2317   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2318   PetscFunctionReturn(PETSC_SUCCESS);
2319 }
2320 
2321 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2322 {
2323   Mat_MatTransMultDense *abt;
2324 
2325   PetscFunctionBegin;
2326   MatCheckProduct(C, 3);
2327   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2328   abt = (Mat_MatTransMultDense *)C->product->data;
2329   switch (abt->alg) {
2330   case 1:
2331     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2332     break;
2333   default:
2334     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2335     break;
2336   }
2337   PetscFunctionReturn(PETSC_SUCCESS);
2338 }
2339 
2340 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2341 {
2342   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2343 
2344   PetscFunctionBegin;
2345   PetscCall(MatDestroy(&ab->Ce));
2346   PetscCall(MatDestroy(&ab->Ae));
2347   PetscCall(MatDestroy(&ab->Be));
2348   PetscCall(PetscFree(ab));
2349   PetscFunctionReturn(PETSC_SUCCESS);
2350 }
2351 
2352 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2353 {
2354   Mat_MatMultDense *ab;
2355   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2356 
2357   PetscFunctionBegin;
2358   MatCheckProduct(C, 3);
2359   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2360   ab = (Mat_MatMultDense *)C->product->data;
2361   if (ab->Ae && ab->Ce) {
2362 #if PetscDefined(HAVE_ELEMENTAL)
2363     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2364     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2365     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2366     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2367 #else
2368     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2369 #endif
2370   } else {
2371     const PetscScalar *read;
2372     PetscScalar       *write;
2373     PetscInt           lda;
2374 
2375     PetscCall(MatDenseGetLDA(B, &lda));
2376     PetscCall(MatDenseGetArrayRead(B, &read));
2377     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2378     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2379     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2380       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2381       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2382     }
2383     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2384     PetscCall(MatDenseRestoreArrayRead(B, &read));
2385     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2386   }
2387   PetscFunctionReturn(PETSC_SUCCESS);
2388 }
2389 
2390 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2391 {
2392   Mat_Product      *product = C->product;
2393   PetscInt          alg;
2394   Mat_MatMultDense *ab;
2395   PetscBool         flg;
2396 
2397   PetscFunctionBegin;
2398   MatCheckProduct(C, 4);
2399   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2400   /* check local size of A and B */
2401   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2402              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2403 
2404   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2405   alg = flg ? 0 : 1;
2406 
2407   /* setup C */
2408   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2409   PetscCall(MatSetType(C, MATMPIDENSE));
2410   PetscCall(MatSetUp(C));
2411 
2412   /* create data structure for reuse Cdense */
2413   PetscCall(PetscNew(&ab));
2414 
2415   switch (alg) {
2416   case 1: /* alg: "elemental" */
2417 #if PetscDefined(HAVE_ELEMENTAL)
2418     /* create elemental matrices Ae and Be */
2419     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2420     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2421     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2422     PetscCall(MatSetUp(ab->Ae));
2423     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2424 
2425     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2426     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2427     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2428     PetscCall(MatSetUp(ab->Be));
2429     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2430 
2431     /* compute symbolic Ce = Ae*Be */
2432     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2433     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2434 #else
2435     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2436 #endif
2437     break;
2438   default: /* alg: "petsc" */
2439     ab->Ae = NULL;
2440     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2441     ab->Ce = NULL;
2442     break;
2443   }
2444 
2445   C->product->data       = ab;
2446   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2447   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2448   PetscFunctionReturn(PETSC_SUCCESS);
2449 }
2450 
2451 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2452 {
2453   Mat_Product *product     = C->product;
2454   const char  *algTypes[2] = {"petsc", "elemental"};
2455   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2456   PetscBool    flg = PETSC_FALSE;
2457 
2458   PetscFunctionBegin;
2459   /* Set default algorithm */
2460   alg = 0; /* default is petsc */
2461   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2462   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2463 
2464   /* Get runtime option */
2465   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2466   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2467   PetscOptionsEnd();
2468   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2469 
2470   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2471   C->ops->productsymbolic = MatProductSymbolic_AB;
2472   PetscFunctionReturn(PETSC_SUCCESS);
2473 }
2474 
2475 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2476 {
2477   Mat_Product *product = C->product;
2478   Mat          A = product->A, B = product->B;
2479 
2480   PetscFunctionBegin;
2481   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2482              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2483   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2484   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2485   PetscFunctionReturn(PETSC_SUCCESS);
2486 }
2487 
2488 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2489 {
2490   Mat_Product *product     = C->product;
2491   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2492   PetscInt     alg, nalg = 2;
2493   PetscBool    flg = PETSC_FALSE;
2494 
2495   PetscFunctionBegin;
2496   /* Set default algorithm */
2497   alg = 0; /* default is allgatherv */
2498   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2499   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2500 
2501   /* Get runtime option */
2502   if (product->api_user) {
2503     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2504     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2505     PetscOptionsEnd();
2506   } else {
2507     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2508     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2509     PetscOptionsEnd();
2510   }
2511   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2512 
2513   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2514   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2515   PetscFunctionReturn(PETSC_SUCCESS);
2516 }
2517 
2518 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2519 {
2520   Mat_Product *product = C->product;
2521 
2522   PetscFunctionBegin;
2523   switch (product->type) {
2524   case MATPRODUCT_AB:
2525     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2526     break;
2527   case MATPRODUCT_AtB:
2528     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2529     break;
2530   case MATPRODUCT_ABt:
2531     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2532     break;
2533   default:
2534     break;
2535   }
2536   PetscFunctionReturn(PETSC_SUCCESS);
2537 }
2538