xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 9fe822445bcdd45fb93170ff68fa7403d3f52f09)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right-hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
519   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
520   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
521   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
522   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
523   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
524   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
525   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecSet(yy, 0.0));
539   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555 
556   PetscFunctionBegin;
557   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
558   PetscCall(VecSet(yy, 0.0));
559   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
560   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
561   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
562   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
563   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
564   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
565   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
566   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
571 {
572   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
573   const PetscScalar *ax;
574   PetscScalar       *ay;
575   PetscMemType       axmtype, aymtype;
576 
577   PetscFunctionBegin;
578   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
579   PetscCall(VecCopy(yy, zz));
580   PetscMPIInt rank;
581   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
582   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
583   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
584   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
585   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
586   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
587   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
588   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
593 {
594   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
595   const PetscScalar *ax;
596   PetscScalar       *ay;
597   PetscMemType       axmtype, aymtype;
598 
599   PetscFunctionBegin;
600   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
601   PetscCall(VecCopy(yy, zz));
602   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
603   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
604   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
605   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
606   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
607   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
608   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
609   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
635 {
636   PetscFunctionBegin;
637   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
642 {
643   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
644   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
645   PetscScalar       *x;
646   const PetscScalar *av;
647 
648   PetscFunctionBegin;
649   PetscCall(VecGetArray(v, &x));
650   PetscCall(VecGetSize(v, &ng));
651   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
652   PetscCall(VecGetLocalSize(v, &nl));
653   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
654   radd = A->rmap->rstart * m;
655   PetscCall(MatDenseGetArrayRead(a->A, &av));
656   PetscCall(MatDenseGetLDA(a->A, &lda));
657   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
658   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
659   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
660   PetscCall(VecRestoreArray(v, &x));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
665 {
666   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
667 
668   PetscFunctionBegin;
669   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
670   PetscCall(MatStashDestroy_Private(&mat->stash));
671   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
672   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
673   PetscCall(MatDestroy(&mdn->A));
674   PetscCall(VecDestroy(&mdn->lvec));
675   PetscCall(PetscSFDestroy(&mdn->Mvctx));
676   PetscCall(VecDestroy(&mdn->cvec));
677   PetscCall(MatDestroy(&mdn->cmat));
678 
679   PetscCall(PetscFree(mat->data));
680   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
681 
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
695 #if defined(PETSC_HAVE_ELEMENTAL)
696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
697 #endif
698 #if defined(PETSC_HAVE_SCALAPACK)
699   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
700 #endif
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
703   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
704 #if defined(PETSC_HAVE_CUDA)
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
723 #endif
724 #if defined(PETSC_HAVE_HIP)
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
743 #endif
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
757 
758   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 #include <petscdraw.h>
763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
764 {
765   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
766   PetscMPIInt       rank;
767   PetscViewerType   vtype;
768   PetscBool         iascii, isdraw;
769   PetscViewer       sviewer;
770   PetscViewerFormat format;
771 
772   PetscFunctionBegin;
773   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
775   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
776   if (iascii) {
777     PetscCall(PetscViewerGetType(viewer, &vtype));
778     PetscCall(PetscViewerGetFormat(viewer, &format));
779     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
780       MatInfo info;
781       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
782       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
783       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
784                                                    (PetscInt)info.memory));
785       PetscCall(PetscViewerFlush(viewer));
786       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
787       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
788       PetscFunctionReturn(PETSC_SUCCESS);
789     } else if (format == PETSC_VIEWER_ASCII_INFO) {
790       PetscFunctionReturn(PETSC_SUCCESS);
791     }
792   } else if (isdraw) {
793     PetscDraw draw;
794     PetscBool isnull;
795 
796     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
797     PetscCall(PetscDrawIsNull(draw, &isnull));
798     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
799   }
800 
801   {
802     /* assemble the entire matrix onto first processor. */
803     Mat          A;
804     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
805     PetscInt    *cols;
806     PetscScalar *vals;
807 
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
809     if (rank == 0) {
810       PetscCall(MatSetSizes(A, M, N, M, N));
811     } else {
812       PetscCall(MatSetSizes(A, 0, 0, M, N));
813     }
814     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
815     PetscCall(MatSetType(A, MATMPIDENSE));
816     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
817 
818     /* Copy the matrix ... This isn't the most efficient means,
819        but it's quick for now */
820     A->insertmode = INSERT_VALUES;
821 
822     row = mat->rmap->rstart;
823     m   = mdn->A->rmap->n;
824     for (i = 0; i < m; i++) {
825       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
826       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
827       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
828       row++;
829     }
830 
831     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
833     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
834     if (rank == 0) {
835       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
836       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
837     }
838     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
839     PetscCall(MatDestroy(&A));
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
845 {
846   PetscBool iascii, isbinary, isdraw, issocket;
847 
848   PetscFunctionBegin;
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
852   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
853 
854   if (iascii || issocket || isdraw) {
855     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
856   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
861 {
862   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
863   Mat            mdn = mat->A;
864   PetscLogDouble isend[5], irecv[5];
865 
866   PetscFunctionBegin;
867   info->block_size = 1.0;
868 
869   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
870 
871   isend[0] = info->nz_used;
872   isend[1] = info->nz_allocated;
873   isend[2] = info->nz_unneeded;
874   isend[3] = info->memory;
875   isend[4] = info->mallocs;
876   if (flag == MAT_LOCAL) {
877     info->nz_used      = isend[0];
878     info->nz_allocated = isend[1];
879     info->nz_unneeded  = isend[2];
880     info->memory       = isend[3];
881     info->mallocs      = isend[4];
882   } else if (flag == MAT_GLOBAL_MAX) {
883     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
884 
885     info->nz_used      = irecv[0];
886     info->nz_allocated = irecv[1];
887     info->nz_unneeded  = irecv[2];
888     info->memory       = irecv[3];
889     info->mallocs      = irecv[4];
890   } else if (flag == MAT_GLOBAL_SUM) {
891     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
892 
893     info->nz_used      = irecv[0];
894     info->nz_allocated = irecv[1];
895     info->nz_unneeded  = irecv[2];
896     info->memory       = irecv[3];
897     info->mallocs      = irecv[4];
898   }
899   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
900   info->fill_ratio_needed = 0;
901   info->factor_mallocs    = 0;
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
906 {
907   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
908 
909   PetscFunctionBegin;
910   switch (op) {
911   case MAT_NEW_NONZERO_LOCATIONS:
912   case MAT_NEW_NONZERO_LOCATION_ERR:
913   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914     MatCheckPreallocated(A, 1);
915     PetscCall(MatSetOption(a->A, op, flg));
916     break;
917   case MAT_ROW_ORIENTED:
918     MatCheckPreallocated(A, 1);
919     a->roworiented = flg;
920     PetscCall(MatSetOption(a->A, op, flg));
921     break;
922   case MAT_FORCE_DIAGONAL_ENTRIES:
923   case MAT_KEEP_NONZERO_PATTERN:
924   case MAT_USE_HASH_TABLE:
925   case MAT_SORTED_FULL:
926   case MAT_IGNORE_LOWER_TRIANGULAR:
927   case MAT_IGNORE_ZERO_ENTRIES:
928   case MAT_SUBMAT_SINGLEIS:
929     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
930     break;
931   case MAT_IGNORE_OFF_PROC_ENTRIES:
932     a->donotstash = flg;
933     break;
934   case MAT_SYMMETRIC:
935   case MAT_STRUCTURALLY_SYMMETRIC:
936   case MAT_HERMITIAN:
937   case MAT_SYMMETRY_ETERNAL:
938   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
939   case MAT_SPD:
940   case MAT_SPD_ETERNAL:
941     /* if the diagonal matrix is square it inherits some of the properties above */
942     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
943     break;
944   default:
945     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
946   }
947   PetscFunctionReturn(PETSC_SUCCESS);
948 }
949 
950 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
951 {
952   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
953   const PetscScalar *l;
954   PetscScalar        x, *v, *vv, *r;
955   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
956 
957   PetscFunctionBegin;
958   PetscCall(MatDenseGetArray(mdn->A, &vv));
959   PetscCall(MatDenseGetLDA(mdn->A, &lda));
960   PetscCall(MatGetLocalSize(A, &s2, &s3));
961   if (ll) {
962     PetscCall(VecGetLocalSize(ll, &s2a));
963     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
964     PetscCall(VecGetArrayRead(ll, &l));
965     for (i = 0; i < m; i++) {
966       x = l[i];
967       v = vv + i;
968       for (j = 0; j < n; j++) {
969         (*v) *= x;
970         v += lda;
971       }
972     }
973     PetscCall(VecRestoreArrayRead(ll, &l));
974     PetscCall(PetscLogFlops(1.0 * n * m));
975   }
976   if (rr) {
977     const PetscScalar *ar;
978 
979     PetscCall(VecGetLocalSize(rr, &s3a));
980     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
981     PetscCall(VecGetArrayRead(rr, &ar));
982     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
983     PetscCall(VecGetArray(mdn->lvec, &r));
984     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
985     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
986     PetscCall(VecRestoreArrayRead(rr, &ar));
987     for (i = 0; i < n; i++) {
988       x = r[i];
989       v = vv + i * lda;
990       for (j = 0; j < m; j++) (*v++) *= x;
991     }
992     PetscCall(VecRestoreArray(mdn->lvec, &r));
993     PetscCall(PetscLogFlops(1.0 * n * m));
994   }
995   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
996   PetscFunctionReturn(PETSC_SUCCESS);
997 }
998 
999 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1000 {
1001   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1002   PetscInt           i, j;
1003   PetscMPIInt        size, iN;
1004   PetscReal          sum = 0.0;
1005   const PetscScalar *av, *v;
1006 
1007   PetscFunctionBegin;
1008   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1009   v = av;
1010   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1011   if (size == 1) {
1012     PetscCall(MatNorm(mdn->A, type, nrm));
1013   } else {
1014     if (type == NORM_FROBENIUS) {
1015       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1016         sum += PetscRealPart(PetscConj(*v) * (*v));
1017         v++;
1018       }
1019       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1020       *nrm = PetscSqrtReal(*nrm);
1021       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1022     } else if (type == NORM_1) {
1023       PetscReal *tmp, *tmp2;
1024       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
1025       *nrm = 0.0;
1026       v    = av;
1027       for (j = 0; j < mdn->A->cmap->n; j++) {
1028         for (i = 0; i < mdn->A->rmap->n; i++) {
1029           tmp[j] += PetscAbsScalar(*v);
1030           v++;
1031         }
1032       }
1033       PetscCall(PetscMPIIntCast(A->cmap->N, &iN));
1034       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1035       for (j = 0; j < A->cmap->N; j++) {
1036         if (tmp2[j] > *nrm) *nrm = tmp2[j];
1037       }
1038       PetscCall(PetscFree2(tmp, tmp2));
1039       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1040     } else if (type == NORM_INFINITY) { /* max row norm */
1041       PetscReal ntemp;
1042       PetscCall(MatNorm(mdn->A, type, &ntemp));
1043       PetscCallMPI(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1044     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1045   }
1046   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1047   PetscFunctionReturn(PETSC_SUCCESS);
1048 }
1049 
1050 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1051 {
1052   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1053   Mat           B;
1054   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1055   PetscInt      j, i, lda;
1056   PetscScalar  *v;
1057 
1058   PetscFunctionBegin;
1059   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1060   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1061     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1062     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1063     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1064     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1065   } else B = *matout;
1066 
1067   m = a->A->rmap->n;
1068   n = a->A->cmap->n;
1069   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1070   PetscCall(MatDenseGetLDA(a->A, &lda));
1071   PetscCall(PetscMalloc1(m, &rwork));
1072   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1073   for (j = 0; j < n; j++) {
1074     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1075     v = PetscSafePointerPlusOffset(v, lda);
1076   }
1077   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1078   PetscCall(PetscFree(rwork));
1079   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1080   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1081   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1082     *matout = B;
1083   } else {
1084     PetscCall(MatHeaderMerge(A, &B));
1085   }
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1090 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1091 
1092 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1093 {
1094   PetscFunctionBegin;
1095   PetscCall(PetscLayoutSetUp(A->rmap));
1096   PetscCall(PetscLayoutSetUp(A->cmap));
1097   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1102 {
1103   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1104 
1105   PetscFunctionBegin;
1106   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1107   PetscFunctionReturn(PETSC_SUCCESS);
1108 }
1109 
1110 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1111 {
1112   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1113 
1114   PetscFunctionBegin;
1115   PetscCall(MatConjugate(a->A));
1116   PetscFunctionReturn(PETSC_SUCCESS);
1117 }
1118 
1119 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1120 {
1121   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1122 
1123   PetscFunctionBegin;
1124   PetscCall(MatRealPart(a->A));
1125   PetscFunctionReturn(PETSC_SUCCESS);
1126 }
1127 
1128 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1129 {
1130   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1131 
1132   PetscFunctionBegin;
1133   PetscCall(MatImaginaryPart(a->A));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1138 {
1139   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1140 
1141   PetscFunctionBegin;
1142   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1143   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1144   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1145   PetscFunctionReturn(PETSC_SUCCESS);
1146 }
1147 
1148 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1149 
1150 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1151 {
1152   PetscInt      i, m, n;
1153   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1154   PetscReal    *work;
1155   PetscMPIInt   in;
1156 
1157   PetscFunctionBegin;
1158   PetscCall(MatGetSize(A, &m, &n));
1159   PetscCall(PetscMalloc1(n, &work));
1160   if (type == REDUCTION_MEAN_REALPART) {
1161     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1162   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1163     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1164   } else {
1165     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1166   }
1167   if (type == NORM_2) {
1168     for (i = 0; i < n; i++) work[i] *= work[i];
1169   }
1170   PetscCall(PetscMPIIntCast(n, &in));
1171   if (type == NORM_INFINITY) {
1172     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1173   } else {
1174     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1175   }
1176   PetscCall(PetscFree(work));
1177   if (type == NORM_2) {
1178     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1179   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1180     for (i = 0; i < n; i++) reductions[i] /= m;
1181   }
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1186 {
1187   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1188 
1189   PetscFunctionBegin;
1190   PetscCall(MatSetRandom(d->A, rctx));
1191 #if defined(PETSC_HAVE_DEVICE)
1192   x->offloadmask = d->A->offloadmask;
1193 #endif
1194   PetscFunctionReturn(PETSC_SUCCESS);
1195 }
1196 
1197 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1198 {
1199   PetscFunctionBegin;
1200   *missing = PETSC_FALSE;
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1205 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1206 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1207 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1208 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1209 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1210 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1211 
1212 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1213                                        MatGetRow_MPIDense,
1214                                        MatRestoreRow_MPIDense,
1215                                        MatMult_MPIDense,
1216                                        /*  4*/ MatMultAdd_MPIDense,
1217                                        MatMultTranspose_MPIDense,
1218                                        MatMultTransposeAdd_MPIDense,
1219                                        NULL,
1220                                        NULL,
1221                                        NULL,
1222                                        /* 10*/ NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        MatTranspose_MPIDense,
1227                                        /* 15*/ MatGetInfo_MPIDense,
1228                                        MatEqual_MPIDense,
1229                                        MatGetDiagonal_MPIDense,
1230                                        MatDiagonalScale_MPIDense,
1231                                        MatNorm_MPIDense,
1232                                        /* 20*/ MatAssemblyBegin_MPIDense,
1233                                        MatAssemblyEnd_MPIDense,
1234                                        MatSetOption_MPIDense,
1235                                        MatZeroEntries_MPIDense,
1236                                        /* 24*/ MatZeroRows_MPIDense,
1237                                        NULL,
1238                                        NULL,
1239                                        NULL,
1240                                        NULL,
1241                                        /* 29*/ MatSetUp_MPIDense,
1242                                        NULL,
1243                                        NULL,
1244                                        MatGetDiagonalBlock_MPIDense,
1245                                        NULL,
1246                                        /* 34*/ MatDuplicate_MPIDense,
1247                                        NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        NULL,
1251                                        /* 39*/ MatAXPY_MPIDense,
1252                                        MatCreateSubMatrices_MPIDense,
1253                                        NULL,
1254                                        MatGetValues_MPIDense,
1255                                        MatCopy_MPIDense,
1256                                        /* 44*/ NULL,
1257                                        MatScale_MPIDense,
1258                                        MatShift_MPIDense,
1259                                        NULL,
1260                                        NULL,
1261                                        /* 49*/ MatSetRandom_MPIDense,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /* 54*/ NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        NULL,
1270                                        NULL,
1271                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1272                                        MatDestroy_MPIDense,
1273                                        MatView_MPIDense,
1274                                        NULL,
1275                                        NULL,
1276                                        /* 64*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        NULL,
1281                                        /* 69*/ NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        /* 74*/ NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        NULL,
1291                                        /* 79*/ NULL,
1292                                        NULL,
1293                                        NULL,
1294                                        NULL,
1295                                        /* 83*/ MatLoad_MPIDense,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        NULL,
1301                                        /* 89*/ NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        NULL,
1305                                        NULL,
1306                                        /* 94*/ NULL,
1307                                        NULL,
1308                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1309                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1310                                        NULL,
1311                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1312                                        NULL,
1313                                        NULL,
1314                                        MatConjugate_MPIDense,
1315                                        NULL,
1316                                        /*104*/ NULL,
1317                                        MatRealPart_MPIDense,
1318                                        MatImaginaryPart_MPIDense,
1319                                        NULL,
1320                                        NULL,
1321                                        /*109*/ NULL,
1322                                        NULL,
1323                                        NULL,
1324                                        MatGetColumnVector_MPIDense,
1325                                        MatMissingDiagonal_MPIDense,
1326                                        /*114*/ NULL,
1327                                        NULL,
1328                                        NULL,
1329                                        NULL,
1330                                        NULL,
1331                                        /*119*/ NULL,
1332                                        NULL,
1333                                        MatMultHermitianTranspose_MPIDense,
1334                                        MatMultHermitianTransposeAdd_MPIDense,
1335                                        NULL,
1336                                        /*124*/ NULL,
1337                                        MatGetColumnReductions_MPIDense,
1338                                        NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        /*129*/ NULL,
1342                                        NULL,
1343                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1344                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1345                                        NULL,
1346                                        /*134*/ NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        /*139*/ NULL,
1352                                        NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1357                                        /*145*/ NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        NULL,
1362                                        /*150*/ NULL,
1363                                        NULL,
1364                                        NULL,
1365                                        NULL,
1366                                        NULL,
1367                                        NULL};
1368 
1369 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1370 {
1371   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1372   MatType       mtype = MATSEQDENSE;
1373 
1374   PetscFunctionBegin;
1375   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1376   PetscCall(PetscLayoutSetUp(mat->rmap));
1377   PetscCall(PetscLayoutSetUp(mat->cmap));
1378   if (!a->A) {
1379     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1380     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1381   }
1382 #if defined(PETSC_HAVE_CUDA)
1383   PetscBool iscuda;
1384   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1385   if (iscuda) mtype = MATSEQDENSECUDA;
1386 #endif
1387 #if defined(PETSC_HAVE_HIP)
1388   PetscBool iship;
1389   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1390   if (iship) mtype = MATSEQDENSEHIP;
1391 #endif
1392   PetscCall(MatSetType(a->A, mtype));
1393   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1394 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1395   mat->offloadmask = a->A->offloadmask;
1396 #endif
1397   mat->preallocated = PETSC_TRUE;
1398   mat->assembled    = PETSC_TRUE;
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1403 {
1404   Mat B, C;
1405 
1406   PetscFunctionBegin;
1407   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1408   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1409   PetscCall(MatDestroy(&C));
1410   if (reuse == MAT_REUSE_MATRIX) {
1411     C = *newmat;
1412   } else C = NULL;
1413   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1414   PetscCall(MatDestroy(&B));
1415   if (reuse == MAT_INPLACE_MATRIX) {
1416     PetscCall(MatHeaderReplace(A, &C));
1417   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1422 {
1423   Mat B, C;
1424 
1425   PetscFunctionBegin;
1426   PetscCall(MatDenseGetLocalMatrix(A, &C));
1427   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1428   if (reuse == MAT_REUSE_MATRIX) {
1429     C = *newmat;
1430   } else C = NULL;
1431   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1432   PetscCall(MatDestroy(&B));
1433   if (reuse == MAT_INPLACE_MATRIX) {
1434     PetscCall(MatHeaderReplace(A, &C));
1435   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1436   PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438 
1439 #if defined(PETSC_HAVE_ELEMENTAL)
1440 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1441 {
1442   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1443   Mat           mat_elemental;
1444   PetscScalar  *v;
1445   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1446 
1447   PetscFunctionBegin;
1448   if (reuse == MAT_REUSE_MATRIX) {
1449     mat_elemental = *newmat;
1450     PetscCall(MatZeroEntries(*newmat));
1451   } else {
1452     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1453     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1454     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1455     PetscCall(MatSetUp(mat_elemental));
1456     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1457   }
1458 
1459   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1460   for (i = 0; i < N; i++) cols[i] = i;
1461   for (i = 0; i < m; i++) rows[i] = rstart + i;
1462 
1463   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1464   PetscCall(MatDenseGetArray(A, &v));
1465   PetscCall(MatDenseGetLDA(a->A, &lda));
1466   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1467   else {
1468     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1469   }
1470   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1471   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1472   PetscCall(MatDenseRestoreArray(A, &v));
1473   PetscCall(PetscFree2(rows, cols));
1474 
1475   if (reuse == MAT_INPLACE_MATRIX) {
1476     PetscCall(MatHeaderReplace(A, &mat_elemental));
1477   } else {
1478     *newmat = mat_elemental;
1479   }
1480   PetscFunctionReturn(PETSC_SUCCESS);
1481 }
1482 #endif
1483 
1484 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1485 {
1486   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1487 
1488   PetscFunctionBegin;
1489   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1490   PetscFunctionReturn(PETSC_SUCCESS);
1491 }
1492 
1493 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1494 {
1495   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1496 
1497   PetscFunctionBegin;
1498   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1499   PetscFunctionReturn(PETSC_SUCCESS);
1500 }
1501 
1502 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1503 {
1504   Mat_MPIDense *mat;
1505   PetscInt      m, nloc, N;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetSize(inmat, &m, &N));
1509   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1510   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1511     PetscInt sum;
1512 
1513     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1514     /* Check sum(n) = N */
1515     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1516     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1517 
1518     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1519     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1520   }
1521 
1522   /* numeric phase */
1523   mat = (Mat_MPIDense *)(*outmat)->data;
1524   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1525   PetscFunctionReturn(PETSC_SUCCESS);
1526 }
1527 
1528 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1529 {
1530   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1531   PetscInt      lda;
1532 
1533   PetscFunctionBegin;
1534   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1535   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1536   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1537   a->vecinuse = col + 1;
1538   PetscCall(MatDenseGetLDA(a->A, &lda));
1539   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1540   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1541   *v = a->cvec;
1542   PetscFunctionReturn(PETSC_SUCCESS);
1543 }
1544 
1545 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1546 {
1547   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1548 
1549   PetscFunctionBegin;
1550   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1551   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1552   VecCheckAssembled(a->cvec);
1553   a->vecinuse = 0;
1554   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1555   PetscCall(VecResetArray(a->cvec));
1556   if (v) *v = NULL;
1557   PetscFunctionReturn(PETSC_SUCCESS);
1558 }
1559 
1560 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1561 {
1562   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1563   PetscInt      lda;
1564 
1565   PetscFunctionBegin;
1566   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1567   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1568   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1569   a->vecinuse = col + 1;
1570   PetscCall(MatDenseGetLDA(a->A, &lda));
1571   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1572   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1573   PetscCall(VecLockReadPush(a->cvec));
1574   *v = a->cvec;
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1579 {
1580   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1581 
1582   PetscFunctionBegin;
1583   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1584   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1585   VecCheckAssembled(a->cvec);
1586   a->vecinuse = 0;
1587   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1588   PetscCall(VecLockReadPop(a->cvec));
1589   PetscCall(VecResetArray(a->cvec));
1590   if (v) *v = NULL;
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1595 {
1596   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1597   PetscInt      lda;
1598 
1599   PetscFunctionBegin;
1600   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1601   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1602   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1603   a->vecinuse = col + 1;
1604   PetscCall(MatDenseGetLDA(a->A, &lda));
1605   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1606   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1607   *v = a->cvec;
1608   PetscFunctionReturn(PETSC_SUCCESS);
1609 }
1610 
1611 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1612 {
1613   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1614 
1615   PetscFunctionBegin;
1616   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1617   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1618   VecCheckAssembled(a->cvec);
1619   a->vecinuse = 0;
1620   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1621   PetscCall(VecResetArray(a->cvec));
1622   if (v) *v = NULL;
1623   PetscFunctionReturn(PETSC_SUCCESS);
1624 }
1625 
1626 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1627 {
1628   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1629   Mat_MPIDense *c;
1630   MPI_Comm      comm;
1631   PetscInt      pbegin, pend;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1635   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1636   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1637   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1638   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1639   if (!a->cmat) {
1640     PetscCall(MatCreate(comm, &a->cmat));
1641     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1642     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1643     else {
1644       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1645       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1646       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1647     }
1648     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1649     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1650   } else {
1651     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1652     if (same && a->cmat->rmap->N != A->rmap->N) {
1653       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1654       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1655     }
1656     if (!same) {
1657       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1658       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1659       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1660       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1661       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1662     }
1663     if (cend - cbegin != a->cmat->cmap->N) {
1664       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1665       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1666       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1667       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1668     }
1669   }
1670   c = (Mat_MPIDense *)a->cmat->data;
1671   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1672   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1673 
1674   a->cmat->preallocated = PETSC_TRUE;
1675   a->cmat->assembled    = PETSC_TRUE;
1676 #if defined(PETSC_HAVE_DEVICE)
1677   a->cmat->offloadmask = c->A->offloadmask;
1678 #endif
1679   a->matinuse = cbegin + 1;
1680   *v          = a->cmat;
1681   PetscFunctionReturn(PETSC_SUCCESS);
1682 }
1683 
1684 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1685 {
1686   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1687   Mat_MPIDense *c;
1688 
1689   PetscFunctionBegin;
1690   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1691   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1692   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1693   a->matinuse = 0;
1694   c           = (Mat_MPIDense *)a->cmat->data;
1695   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1696   if (v) *v = NULL;
1697 #if defined(PETSC_HAVE_DEVICE)
1698   A->offloadmask = a->A->offloadmask;
1699 #endif
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 /*MC
1704    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1705 
1706    Options Database Key:
1707 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1708 
1709   Level: beginner
1710 
1711 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1712 M*/
1713 PetscErrorCode MatCreate_MPIDense(Mat mat)
1714 {
1715   Mat_MPIDense *a;
1716 
1717   PetscFunctionBegin;
1718   PetscCall(PetscNew(&a));
1719   mat->data   = (void *)a;
1720   mat->ops[0] = MatOps_Values;
1721 
1722   mat->insertmode = NOT_SET_VALUES;
1723 
1724   /* build cache for off array entries formed */
1725   a->donotstash = PETSC_FALSE;
1726 
1727   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1728 
1729   /* stuff used for matrix vector multiply */
1730   a->lvec        = NULL;
1731   a->Mvctx       = NULL;
1732   a->roworiented = PETSC_TRUE;
1733 
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1755 #if defined(PETSC_HAVE_ELEMENTAL)
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1757 #endif
1758 #if defined(PETSC_HAVE_SCALAPACK)
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1760 #endif
1761 #if defined(PETSC_HAVE_CUDA)
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1763 #endif
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1767 #if defined(PETSC_HAVE_CUDA)
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1770 #endif
1771 #if defined(PETSC_HAVE_HIP)
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1775 #endif
1776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1781   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1782   PetscFunctionReturn(PETSC_SUCCESS);
1783 }
1784 
1785 /*MC
1786    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1787 
1788    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1789    and `MATMPIDENSE` otherwise.
1790 
1791    Options Database Key:
1792 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1793 
1794   Level: beginner
1795 
1796 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1797 M*/
1798 
1799 /*@
1800   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1801 
1802   Collective
1803 
1804   Input Parameters:
1805 + B    - the matrix
1806 - data - optional location of matrix data.  Set to `NULL` for PETSc
1807          to control all matrix memory allocation.
1808 
1809   Level: intermediate
1810 
1811   Notes:
1812   The dense format is fully compatible with standard Fortran
1813   storage by columns.
1814 
1815   The data input variable is intended primarily for Fortran programmers
1816   who wish to allocate their own matrix memory space.  Most users should
1817   set `data` to `NULL`.
1818 
1819 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1820 @*/
1821 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1822 {
1823   PetscFunctionBegin;
1824   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1825   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1826   PetscFunctionReturn(PETSC_SUCCESS);
1827 }
1828 
1829 /*@
1830   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1831   array provided by the user. This is useful to avoid copying an array
1832   into a matrix
1833 
1834   Not Collective
1835 
1836   Input Parameters:
1837 + mat   - the matrix
1838 - array - the array in column major order
1839 
1840   Level: developer
1841 
1842   Note:
1843   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1844 
1845   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1846   freed when the matrix is destroyed.
1847 
1848 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1849           `MatDenseReplaceArray()`
1850 @*/
1851 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1852 {
1853   PetscFunctionBegin;
1854   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1855   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1856   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1857 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1858   mat->offloadmask = PETSC_OFFLOAD_CPU;
1859 #endif
1860   PetscFunctionReturn(PETSC_SUCCESS);
1861 }
1862 
1863 /*@
1864   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1865 
1866   Not Collective
1867 
1868   Input Parameter:
1869 . mat - the matrix
1870 
1871   Level: developer
1872 
1873   Note:
1874   You can only call this after a call to `MatDensePlaceArray()`
1875 
1876 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1877 @*/
1878 PetscErrorCode MatDenseResetArray(Mat mat)
1879 {
1880   PetscFunctionBegin;
1881   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1882   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1883   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1884   PetscFunctionReturn(PETSC_SUCCESS);
1885 }
1886 
1887 /*@
1888   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1889   array provided by the user. This is useful to avoid copying an array
1890   into a matrix
1891 
1892   Not Collective
1893 
1894   Input Parameters:
1895 + mat   - the matrix
1896 - array - the array in column major order
1897 
1898   Level: developer
1899 
1900   Note:
1901   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1902 
1903   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1904   freed by the user. It will be freed when the matrix is destroyed.
1905 
1906 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1907 @*/
1908 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1909 {
1910   PetscFunctionBegin;
1911   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1912   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1913   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1914 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1915   mat->offloadmask = PETSC_OFFLOAD_CPU;
1916 #endif
1917   PetscFunctionReturn(PETSC_SUCCESS);
1918 }
1919 
1920 /*@
1921   MatCreateDense - Creates a matrix in `MATDENSE` format.
1922 
1923   Collective
1924 
1925   Input Parameters:
1926 + comm - MPI communicator
1927 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1928 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1929 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1930 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1931 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1932    to control all matrix memory allocation.
1933 
1934   Output Parameter:
1935 . A - the matrix
1936 
1937   Level: intermediate
1938 
1939   Notes:
1940   The dense format is fully compatible with standard Fortran
1941   storage by columns.
1942 
1943   Although local portions of the matrix are stored in column-major
1944   order, the matrix is partitioned across MPI ranks by row.
1945 
1946   The data input variable is intended primarily for Fortran programmers
1947   who wish to allocate their own matrix memory space.  Most users should
1948   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1949 
1950   The user MUST specify either the local or global matrix dimensions
1951   (possibly both).
1952 
1953 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1954 @*/
1955 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1956 {
1957   PetscFunctionBegin;
1958   PetscCall(MatCreate(comm, A));
1959   PetscCall(MatSetSizes(*A, m, n, M, N));
1960   PetscCall(MatSetType(*A, MATDENSE));
1961   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1962   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1963   PetscFunctionReturn(PETSC_SUCCESS);
1964 }
1965 
1966 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1967 {
1968   Mat           mat;
1969   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1970 
1971   PetscFunctionBegin;
1972   *newmat = NULL;
1973   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1974   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1975   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1976   a = (Mat_MPIDense *)mat->data;
1977 
1978   mat->factortype   = A->factortype;
1979   mat->assembled    = PETSC_TRUE;
1980   mat->preallocated = PETSC_TRUE;
1981 
1982   mat->insertmode = NOT_SET_VALUES;
1983   a->donotstash   = oldmat->donotstash;
1984 
1985   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1986   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1987 
1988   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1989 
1990   *newmat = mat;
1991   PetscFunctionReturn(PETSC_SUCCESS);
1992 }
1993 
1994 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1995 {
1996   PetscBool isbinary;
1997 #if defined(PETSC_HAVE_HDF5)
1998   PetscBool ishdf5;
1999 #endif
2000 
2001   PetscFunctionBegin;
2002   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2003   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2004   /* force binary viewer to load .info file if it has not yet done so */
2005   PetscCall(PetscViewerSetUp(viewer));
2006   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2007 #if defined(PETSC_HAVE_HDF5)
2008   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2009 #endif
2010   if (isbinary) {
2011     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2012 #if defined(PETSC_HAVE_HDF5)
2013   } else if (ishdf5) {
2014     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2015 #endif
2016   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2017   PetscFunctionReturn(PETSC_SUCCESS);
2018 }
2019 
2020 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2021 {
2022   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2023   Mat           a, b;
2024 
2025   PetscFunctionBegin;
2026   a = matA->A;
2027   b = matB->A;
2028   PetscCall(MatEqual(a, b, flag));
2029   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2034 {
2035   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2036 
2037   PetscFunctionBegin;
2038   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2039   PetscCall(MatDestroy(&atb->atb));
2040   PetscCall(PetscFree(atb));
2041   PetscFunctionReturn(PETSC_SUCCESS);
2042 }
2043 
2044 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2045 {
2046   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2047 
2048   PetscFunctionBegin;
2049   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2050   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2051   PetscCall(PetscFree(abt));
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2056 {
2057   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2058   Mat_TransMatMultDense *atb;
2059   MPI_Comm               comm;
2060   PetscMPIInt            size, *recvcounts;
2061   PetscScalar           *carray, *sendbuf;
2062   const PetscScalar     *atbarray;
2063   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2064   const PetscInt        *ranges;
2065 
2066   PetscFunctionBegin;
2067   MatCheckProduct(C, 3);
2068   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2069   atb        = (Mat_TransMatMultDense *)C->product->data;
2070   recvcounts = atb->recvcounts;
2071   sendbuf    = atb->sendbuf;
2072 
2073   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2074   PetscCallMPI(MPI_Comm_size(comm, &size));
2075 
2076   /* compute atbarray = aseq^T * bseq */
2077   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2078 
2079   PetscCall(MatGetOwnershipRanges(C, &ranges));
2080 
2081   /* arrange atbarray into sendbuf */
2082   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2083   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2084   for (proc = 0, k = 0; proc < size; proc++) {
2085     for (j = 0; j < cN; j++) {
2086       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2087     }
2088   }
2089   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2090 
2091   /* sum all atbarray to local values of C */
2092   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2093   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2094   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2095   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2096   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2097   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2098   PetscFunctionReturn(PETSC_SUCCESS);
2099 }
2100 
2101 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2102 {
2103   MPI_Comm               comm;
2104   PetscMPIInt            size;
2105   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2106   Mat_TransMatMultDense *atb;
2107   PetscBool              cisdense = PETSC_FALSE;
2108   const PetscInt        *ranges;
2109 
2110   PetscFunctionBegin;
2111   MatCheckProduct(C, 4);
2112   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2113   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2114   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2115     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2116   }
2117 
2118   /* create matrix product C */
2119   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2120 #if defined(PETSC_HAVE_CUDA)
2121   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2122 #endif
2123 #if defined(PETSC_HAVE_HIP)
2124   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2125 #endif
2126   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2127   PetscCall(MatSetUp(C));
2128 
2129   /* create data structure for reuse C */
2130   PetscCallMPI(MPI_Comm_size(comm, &size));
2131   PetscCall(PetscNew(&atb));
2132   cM = C->rmap->N;
2133   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2134   PetscCall(MatGetOwnershipRanges(C, &ranges));
2135   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2136   C->product->data    = atb;
2137   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2142 {
2143   MPI_Comm               comm;
2144   PetscMPIInt            i, size;
2145   PetscInt               maxRows, bufsiz;
2146   PetscMPIInt            tag;
2147   PetscInt               alg;
2148   Mat_MatTransMultDense *abt;
2149   Mat_Product           *product = C->product;
2150   PetscBool              flg;
2151 
2152   PetscFunctionBegin;
2153   MatCheckProduct(C, 4);
2154   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2155   /* check local size of A and B */
2156   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2157 
2158   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2159   alg = flg ? 0 : 1;
2160 
2161   /* setup matrix product C */
2162   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2163   PetscCall(MatSetType(C, MATMPIDENSE));
2164   PetscCall(MatSetUp(C));
2165   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2166 
2167   /* create data structure for reuse C */
2168   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2169   PetscCallMPI(MPI_Comm_size(comm, &size));
2170   PetscCall(PetscNew(&abt));
2171   abt->tag = tag;
2172   abt->alg = alg;
2173   switch (alg) {
2174   case 1: /* alg: "cyclic" */
2175     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2176     bufsiz = A->cmap->N * maxRows;
2177     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2178     break;
2179   default: /* alg: "allgatherv" */
2180     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2181     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2182     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2183     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2184     break;
2185   }
2186 
2187   C->product->data                = abt;
2188   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2189   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2190   PetscFunctionReturn(PETSC_SUCCESS);
2191 }
2192 
2193 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2194 {
2195   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2196   Mat_MatTransMultDense *abt;
2197   MPI_Comm               comm;
2198   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2199   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2200   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2201   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2202   const PetscScalar     *av, *bv;
2203   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2204   MPI_Request            reqs[2];
2205   const PetscInt        *ranges;
2206 
2207   PetscFunctionBegin;
2208   MatCheckProduct(C, 3);
2209   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2210   abt = (Mat_MatTransMultDense *)C->product->data;
2211   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2212   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2213   PetscCallMPI(MPI_Comm_size(comm, &size));
2214   PetscCall(MatDenseGetArrayRead(a->A, &av));
2215   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2216   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2217   PetscCall(MatDenseGetLDA(a->A, &i));
2218   PetscCall(PetscBLASIntCast(i, &alda));
2219   PetscCall(MatDenseGetLDA(b->A, &i));
2220   PetscCall(PetscBLASIntCast(i, &blda));
2221   PetscCall(MatDenseGetLDA(c->A, &i));
2222   PetscCall(PetscBLASIntCast(i, &clda));
2223   PetscCall(MatGetOwnershipRanges(B, &ranges));
2224   bn = B->rmap->n;
2225   if (blda == bn) {
2226     sendbuf = (PetscScalar *)bv;
2227   } else {
2228     sendbuf = abt->buf[0];
2229     for (k = 0, i = 0; i < cK; i++) {
2230       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2231     }
2232   }
2233   if (size > 1) {
2234     sendto   = (rank + size - 1) % size;
2235     recvfrom = (rank + size + 1) % size;
2236   } else {
2237     sendto = recvfrom = 0;
2238   }
2239   PetscCall(PetscBLASIntCast(cK, &ck));
2240   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2241   recvisfrom = rank;
2242   for (i = 0; i < size; i++) {
2243     /* we have finished receiving in sending, bufs can be read/modified */
2244     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2245     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2246 
2247     if (nextrecvisfrom != rank) {
2248       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2249       sendsiz = cK * bn;
2250       recvsiz = cK * nextbn;
2251       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2252       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2253       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2254     }
2255 
2256     /* local aseq * sendbuf^T */
2257     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2258     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2259 
2260     if (nextrecvisfrom != rank) {
2261       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2262       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2263     }
2264     bn         = nextbn;
2265     recvisfrom = nextrecvisfrom;
2266     sendbuf    = recvbuf;
2267   }
2268   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2269   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2270   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2271   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2272   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2273   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2274   PetscFunctionReturn(PETSC_SUCCESS);
2275 }
2276 
2277 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2278 {
2279   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2280   Mat_MatTransMultDense *abt;
2281   MPI_Comm               comm;
2282   PetscMPIInt            size, ibn;
2283   PetscScalar           *cv, *sendbuf, *recvbuf;
2284   const PetscScalar     *av, *bv;
2285   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2286   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2287   PetscBLASInt           cm, cn, ck, alda, clda;
2288 
2289   PetscFunctionBegin;
2290   MatCheckProduct(C, 3);
2291   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2292   abt = (Mat_MatTransMultDense *)C->product->data;
2293   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2294   PetscCallMPI(MPI_Comm_size(comm, &size));
2295   PetscCall(MatDenseGetArrayRead(a->A, &av));
2296   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2297   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2298   PetscCall(MatDenseGetLDA(a->A, &i));
2299   PetscCall(PetscBLASIntCast(i, &alda));
2300   PetscCall(MatDenseGetLDA(b->A, &blda));
2301   PetscCall(MatDenseGetLDA(c->A, &i));
2302   PetscCall(PetscBLASIntCast(i, &clda));
2303   /* copy transpose of B into buf[0] */
2304   bn      = B->rmap->n;
2305   sendbuf = abt->buf[0];
2306   recvbuf = abt->buf[1];
2307   for (k = 0, j = 0; j < bn; j++) {
2308     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2309   }
2310   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2311   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2312   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2313   PetscCall(PetscBLASIntCast(cK, &ck));
2314   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2315   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2316   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2317   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2318   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2319   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2320   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2321   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2322   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2323   PetscFunctionReturn(PETSC_SUCCESS);
2324 }
2325 
2326 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2327 {
2328   Mat_MatTransMultDense *abt;
2329 
2330   PetscFunctionBegin;
2331   MatCheckProduct(C, 3);
2332   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2333   abt = (Mat_MatTransMultDense *)C->product->data;
2334   switch (abt->alg) {
2335   case 1:
2336     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2337     break;
2338   default:
2339     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2340     break;
2341   }
2342   PetscFunctionReturn(PETSC_SUCCESS);
2343 }
2344 
2345 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2346 {
2347   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2348 
2349   PetscFunctionBegin;
2350   PetscCall(MatDestroy(&ab->Ce));
2351   PetscCall(MatDestroy(&ab->Ae));
2352   PetscCall(MatDestroy(&ab->Be));
2353   PetscCall(PetscFree(ab));
2354   PetscFunctionReturn(PETSC_SUCCESS);
2355 }
2356 
2357 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2358 {
2359   Mat_MatMultDense *ab;
2360   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2361 
2362   PetscFunctionBegin;
2363   MatCheckProduct(C, 3);
2364   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2365   ab = (Mat_MatMultDense *)C->product->data;
2366   if (ab->Ae && ab->Ce) {
2367 #if PetscDefined(HAVE_ELEMENTAL)
2368     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2369     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2370     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2371     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2372 #else
2373     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2374 #endif
2375   } else {
2376     const PetscScalar *read;
2377     PetscScalar       *write;
2378     PetscInt           lda;
2379 
2380     PetscCall(MatDenseGetLDA(B, &lda));
2381     PetscCall(MatDenseGetArrayRead(B, &read));
2382     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2383     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2384     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2385       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2386       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2387     }
2388     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2389     PetscCall(MatDenseRestoreArrayRead(B, &read));
2390     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2391   }
2392   PetscFunctionReturn(PETSC_SUCCESS);
2393 }
2394 
2395 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2396 {
2397   Mat_Product      *product = C->product;
2398   PetscInt          alg;
2399   Mat_MatMultDense *ab;
2400   PetscBool         flg;
2401 
2402   PetscFunctionBegin;
2403   MatCheckProduct(C, 4);
2404   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2405   /* check local size of A and B */
2406   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2407              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2408 
2409   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2410   alg = flg ? 0 : 1;
2411 
2412   /* setup C */
2413   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2414   PetscCall(MatSetType(C, MATMPIDENSE));
2415   PetscCall(MatSetUp(C));
2416 
2417   /* create data structure for reuse Cdense */
2418   PetscCall(PetscNew(&ab));
2419 
2420   switch (alg) {
2421   case 1: /* alg: "elemental" */
2422 #if PetscDefined(HAVE_ELEMENTAL)
2423     /* create elemental matrices Ae and Be */
2424     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2425     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2426     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2427     PetscCall(MatSetUp(ab->Ae));
2428     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2429 
2430     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2431     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2432     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2433     PetscCall(MatSetUp(ab->Be));
2434     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2435 
2436     /* compute symbolic Ce = Ae*Be */
2437     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2438     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2439 #else
2440     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2441 #endif
2442     break;
2443   default: /* alg: "petsc" */
2444     ab->Ae = NULL;
2445     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2446     ab->Ce = NULL;
2447     break;
2448   }
2449 
2450   C->product->data       = ab;
2451   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2452   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2453   PetscFunctionReturn(PETSC_SUCCESS);
2454 }
2455 
2456 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2457 {
2458   Mat_Product *product     = C->product;
2459   const char  *algTypes[2] = {"petsc", "elemental"};
2460   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2461   PetscBool    flg = PETSC_FALSE;
2462 
2463   PetscFunctionBegin;
2464   /* Set default algorithm */
2465   alg = 0; /* default is petsc */
2466   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2467   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2468 
2469   /* Get runtime option */
2470   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2471   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2472   PetscOptionsEnd();
2473   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2474 
2475   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2476   C->ops->productsymbolic = MatProductSymbolic_AB;
2477   PetscFunctionReturn(PETSC_SUCCESS);
2478 }
2479 
2480 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2481 {
2482   Mat_Product *product = C->product;
2483   Mat          A = product->A, B = product->B;
2484 
2485   PetscFunctionBegin;
2486   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2487              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2488   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2489   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2490   PetscFunctionReturn(PETSC_SUCCESS);
2491 }
2492 
2493 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2494 {
2495   Mat_Product *product     = C->product;
2496   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2497   PetscInt     alg, nalg = 2;
2498   PetscBool    flg = PETSC_FALSE;
2499 
2500   PetscFunctionBegin;
2501   /* Set default algorithm */
2502   alg = 0; /* default is allgatherv */
2503   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2504   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2505 
2506   /* Get runtime option */
2507   if (product->api_user) {
2508     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2509     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2510     PetscOptionsEnd();
2511   } else {
2512     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2513     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2514     PetscOptionsEnd();
2515   }
2516   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2517 
2518   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2519   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2520   PetscFunctionReturn(PETSC_SUCCESS);
2521 }
2522 
2523 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2524 {
2525   Mat_Product *product = C->product;
2526 
2527   PetscFunctionBegin;
2528   switch (product->type) {
2529   case MATPRODUCT_AB:
2530     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2531     break;
2532   case MATPRODUCT_AtB:
2533     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2534     break;
2535   case MATPRODUCT_ABt:
2536     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2537     break;
2538   default:
2539     break;
2540   }
2541   PetscFunctionReturn(PETSC_SUCCESS);
2542 }
2543