xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision e91c04dfc8a52dee1965211bb1cc8e5bf775178f)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right-hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
519   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
520   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
521   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
522   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
523   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
524   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
525   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecSet(yy, 0.0));
539   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555 
556   PetscFunctionBegin;
557   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
558   PetscCall(VecSet(yy, 0.0));
559   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
560   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
561   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
562   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
563   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
564   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
565   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
566   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
571 {
572   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
573   const PetscScalar *ax;
574   PetscScalar       *ay;
575   PetscMemType       axmtype, aymtype;
576 
577   PetscFunctionBegin;
578   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
579   PetscCall(VecCopy(yy, zz));
580   PetscMPIInt rank;
581   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
582   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
583   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
584   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
585   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
586   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
587   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
588   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
593 {
594   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
595   const PetscScalar *ax;
596   PetscScalar       *ay;
597   PetscMemType       axmtype, aymtype;
598 
599   PetscFunctionBegin;
600   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
601   PetscCall(VecCopy(yy, zz));
602   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
603   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
604   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
605   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
606   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
607   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
608   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
609   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
635 {
636   PetscFunctionBegin;
637   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
642 {
643   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
644   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
645   PetscScalar       *x;
646   const PetscScalar *av;
647 
648   PetscFunctionBegin;
649   PetscCall(VecGetArray(v, &x));
650   PetscCall(VecGetSize(v, &ng));
651   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
652   PetscCall(VecGetLocalSize(v, &nl));
653   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
654   radd = A->rmap->rstart * m;
655   PetscCall(MatDenseGetArrayRead(a->A, &av));
656   PetscCall(MatDenseGetLDA(a->A, &lda));
657   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
658   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
659   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
660   PetscCall(VecRestoreArray(v, &x));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
665 {
666   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
667 
668   PetscFunctionBegin;
669   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
670   PetscCall(MatStashDestroy_Private(&mat->stash));
671   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
672   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
673   PetscCall(MatDestroy(&mdn->A));
674   PetscCall(VecDestroy(&mdn->lvec));
675   PetscCall(PetscSFDestroy(&mdn->Mvctx));
676   PetscCall(VecDestroy(&mdn->cvec));
677   PetscCall(MatDestroy(&mdn->cmat));
678 
679   PetscCall(PetscFree(mat->data));
680   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
681 
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
695 #if defined(PETSC_HAVE_ELEMENTAL)
696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
697 #endif
698 #if defined(PETSC_HAVE_SCALAPACK)
699   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
700 #endif
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
703   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
704 #if defined(PETSC_HAVE_CUDA)
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
723 #endif
724 #if defined(PETSC_HAVE_HIP)
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
743 #endif
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
757 
758   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 #include <petscdraw.h>
763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
764 {
765   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
766   PetscMPIInt       rank;
767   PetscViewerType   vtype;
768   PetscBool         iascii, isdraw;
769   PetscViewer       sviewer;
770   PetscViewerFormat format;
771 
772   PetscFunctionBegin;
773   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
775   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
776   if (iascii) {
777     PetscCall(PetscViewerGetType(viewer, &vtype));
778     PetscCall(PetscViewerGetFormat(viewer, &format));
779     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
780       MatInfo info;
781       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
782       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
783       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
784                                                    (PetscInt)info.memory));
785       PetscCall(PetscViewerFlush(viewer));
786       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
787       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
788       PetscFunctionReturn(PETSC_SUCCESS);
789     } else if (format == PETSC_VIEWER_ASCII_INFO) {
790       PetscFunctionReturn(PETSC_SUCCESS);
791     }
792   } else if (isdraw) {
793     PetscDraw draw;
794     PetscBool isnull;
795 
796     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
797     PetscCall(PetscDrawIsNull(draw, &isnull));
798     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
799   }
800 
801   {
802     /* assemble the entire matrix onto first processor. */
803     Mat          A;
804     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
805     PetscInt    *cols;
806     PetscScalar *vals;
807 
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
809     if (rank == 0) {
810       PetscCall(MatSetSizes(A, M, N, M, N));
811     } else {
812       PetscCall(MatSetSizes(A, 0, 0, M, N));
813     }
814     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
815     PetscCall(MatSetType(A, MATMPIDENSE));
816     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
817 
818     /* Copy the matrix ... This isn't the most efficient means,
819        but it's quick for now */
820     A->insertmode = INSERT_VALUES;
821 
822     row = mat->rmap->rstart;
823     m   = mdn->A->rmap->n;
824     for (i = 0; i < m; i++) {
825       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
826       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
827       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
828       row++;
829     }
830 
831     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
833     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
834     if (rank == 0) {
835       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
836       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
837     }
838     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
839     PetscCall(MatDestroy(&A));
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
845 {
846   PetscBool iascii, isbinary, isdraw, issocket;
847 
848   PetscFunctionBegin;
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
852   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
853 
854   if (iascii || issocket || isdraw) {
855     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
856   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
861 {
862   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
863   Mat            mdn = mat->A;
864   PetscLogDouble isend[5], irecv[5];
865 
866   PetscFunctionBegin;
867   info->block_size = 1.0;
868 
869   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
870 
871   isend[0] = info->nz_used;
872   isend[1] = info->nz_allocated;
873   isend[2] = info->nz_unneeded;
874   isend[3] = info->memory;
875   isend[4] = info->mallocs;
876   if (flag == MAT_LOCAL) {
877     info->nz_used      = isend[0];
878     info->nz_allocated = isend[1];
879     info->nz_unneeded  = isend[2];
880     info->memory       = isend[3];
881     info->mallocs      = isend[4];
882   } else if (flag == MAT_GLOBAL_MAX) {
883     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
884 
885     info->nz_used      = irecv[0];
886     info->nz_allocated = irecv[1];
887     info->nz_unneeded  = irecv[2];
888     info->memory       = irecv[3];
889     info->mallocs      = irecv[4];
890   } else if (flag == MAT_GLOBAL_SUM) {
891     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
892 
893     info->nz_used      = irecv[0];
894     info->nz_allocated = irecv[1];
895     info->nz_unneeded  = irecv[2];
896     info->memory       = irecv[3];
897     info->mallocs      = irecv[4];
898   }
899   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
900   info->fill_ratio_needed = 0;
901   info->factor_mallocs    = 0;
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
906 {
907   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
908 
909   PetscFunctionBegin;
910   switch (op) {
911   case MAT_NEW_NONZERO_LOCATIONS:
912   case MAT_NEW_NONZERO_LOCATION_ERR:
913   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914     MatCheckPreallocated(A, 1);
915     PetscCall(MatSetOption(a->A, op, flg));
916     break;
917   case MAT_ROW_ORIENTED:
918     MatCheckPreallocated(A, 1);
919     a->roworiented = flg;
920     PetscCall(MatSetOption(a->A, op, flg));
921     break;
922   case MAT_FORCE_DIAGONAL_ENTRIES:
923   case MAT_KEEP_NONZERO_PATTERN:
924   case MAT_USE_HASH_TABLE:
925   case MAT_SORTED_FULL:
926     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
927     break;
928   case MAT_IGNORE_OFF_PROC_ENTRIES:
929     a->donotstash = flg;
930     break;
931   case MAT_SYMMETRIC:
932   case MAT_STRUCTURALLY_SYMMETRIC:
933   case MAT_HERMITIAN:
934   case MAT_SYMMETRY_ETERNAL:
935   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
936   case MAT_SPD:
937   case MAT_IGNORE_LOWER_TRIANGULAR:
938   case MAT_IGNORE_ZERO_ENTRIES:
939   case MAT_SPD_ETERNAL:
940     /* if the diagonal matrix is square it inherits some of the properties above */
941     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
942     break;
943   default:
944     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
945   }
946   PetscFunctionReturn(PETSC_SUCCESS);
947 }
948 
949 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
950 {
951   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
952   const PetscScalar *l;
953   PetscScalar        x, *v, *vv, *r;
954   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
955 
956   PetscFunctionBegin;
957   PetscCall(MatDenseGetArray(mdn->A, &vv));
958   PetscCall(MatDenseGetLDA(mdn->A, &lda));
959   PetscCall(MatGetLocalSize(A, &s2, &s3));
960   if (ll) {
961     PetscCall(VecGetLocalSize(ll, &s2a));
962     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
963     PetscCall(VecGetArrayRead(ll, &l));
964     for (i = 0; i < m; i++) {
965       x = l[i];
966       v = vv + i;
967       for (j = 0; j < n; j++) {
968         (*v) *= x;
969         v += lda;
970       }
971     }
972     PetscCall(VecRestoreArrayRead(ll, &l));
973     PetscCall(PetscLogFlops(1.0 * n * m));
974   }
975   if (rr) {
976     const PetscScalar *ar;
977 
978     PetscCall(VecGetLocalSize(rr, &s3a));
979     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
980     PetscCall(VecGetArrayRead(rr, &ar));
981     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
982     PetscCall(VecGetArray(mdn->lvec, &r));
983     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
984     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
985     PetscCall(VecRestoreArrayRead(rr, &ar));
986     for (i = 0; i < n; i++) {
987       x = r[i];
988       v = vv + i * lda;
989       for (j = 0; j < m; j++) (*v++) *= x;
990     }
991     PetscCall(VecRestoreArray(mdn->lvec, &r));
992     PetscCall(PetscLogFlops(1.0 * n * m));
993   }
994   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
995   PetscFunctionReturn(PETSC_SUCCESS);
996 }
997 
998 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
999 {
1000   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1001   PetscInt           i, j;
1002   PetscMPIInt        size;
1003   PetscReal          sum = 0.0;
1004   const PetscScalar *av, *v;
1005 
1006   PetscFunctionBegin;
1007   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1008   v = av;
1009   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1010   if (size == 1) {
1011     PetscCall(MatNorm(mdn->A, type, nrm));
1012   } else {
1013     if (type == NORM_FROBENIUS) {
1014       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1015         sum += PetscRealPart(PetscConj(*v) * (*v));
1016         v++;
1017       }
1018       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1019       *nrm = PetscSqrtReal(*nrm);
1020       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1021     } else if (type == NORM_1) {
1022       PetscReal *tmp;
1023 
1024       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1025       *nrm = 0.0;
1026       v    = av;
1027       for (j = 0; j < mdn->A->cmap->n; j++) {
1028         for (i = 0; i < mdn->A->rmap->n; i++) {
1029           tmp[j] += PetscAbsScalar(*v);
1030           v++;
1031         }
1032       }
1033       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1034       for (j = 0; j < A->cmap->N; j++) {
1035         if (tmp[j] > *nrm) *nrm = tmp[j];
1036       }
1037       PetscCall(PetscFree(tmp));
1038       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1039     } else if (type == NORM_INFINITY) { /* max row norm */
1040       PetscCall(MatNorm(mdn->A, type, nrm));
1041       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1042     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1043   }
1044   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1049 {
1050   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1051   Mat           B;
1052   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1053   PetscInt      j, i, lda;
1054   PetscScalar  *v;
1055 
1056   PetscFunctionBegin;
1057   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1058   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1059     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1060     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1061     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1062     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1063   } else B = *matout;
1064 
1065   m = a->A->rmap->n;
1066   n = a->A->cmap->n;
1067   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1068   PetscCall(MatDenseGetLDA(a->A, &lda));
1069   PetscCall(PetscMalloc1(m, &rwork));
1070   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1071   for (j = 0; j < n; j++) {
1072     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1073     v = PetscSafePointerPlusOffset(v, lda);
1074   }
1075   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1076   PetscCall(PetscFree(rwork));
1077   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1078   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1080     *matout = B;
1081   } else {
1082     PetscCall(MatHeaderMerge(A, &B));
1083   }
1084   PetscFunctionReturn(PETSC_SUCCESS);
1085 }
1086 
1087 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1088 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1089 
1090 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1091 {
1092   PetscFunctionBegin;
1093   PetscCall(PetscLayoutSetUp(A->rmap));
1094   PetscCall(PetscLayoutSetUp(A->cmap));
1095   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1096   PetscFunctionReturn(PETSC_SUCCESS);
1097 }
1098 
1099 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1100 {
1101   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1102 
1103   PetscFunctionBegin;
1104   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1105   PetscFunctionReturn(PETSC_SUCCESS);
1106 }
1107 
1108 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1109 {
1110   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1111 
1112   PetscFunctionBegin;
1113   PetscCall(MatConjugate(a->A));
1114   PetscFunctionReturn(PETSC_SUCCESS);
1115 }
1116 
1117 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1118 {
1119   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1120 
1121   PetscFunctionBegin;
1122   PetscCall(MatRealPart(a->A));
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 
1126 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1127 {
1128   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1129 
1130   PetscFunctionBegin;
1131   PetscCall(MatImaginaryPart(a->A));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1136 {
1137   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138 
1139   PetscFunctionBegin;
1140   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1141   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1142   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1143   PetscFunctionReturn(PETSC_SUCCESS);
1144 }
1145 
1146 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1147 
1148 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1149 {
1150   PetscInt      i, m, n;
1151   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1152 
1153   PetscFunctionBegin;
1154   PetscCall(MatGetSize(A, &m, &n));
1155   if (type == REDUCTION_MEAN_REALPART) {
1156     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1157   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1158     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1159   } else {
1160     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1161   }
1162   if (type == NORM_2) {
1163     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1164   }
1165   if (type == NORM_INFINITY) {
1166     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1167   } else {
1168     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1169   }
1170   if (type == NORM_2) {
1171     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1172   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1173     for (i = 0; i < n; i++) reductions[i] /= m;
1174   }
1175   PetscFunctionReturn(PETSC_SUCCESS);
1176 }
1177 
1178 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1179 {
1180   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1181 
1182   PetscFunctionBegin;
1183   PetscCall(MatSetRandom(d->A, rctx));
1184 #if defined(PETSC_HAVE_DEVICE)
1185   x->offloadmask = d->A->offloadmask;
1186 #endif
1187   PetscFunctionReturn(PETSC_SUCCESS);
1188 }
1189 
1190 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1191 {
1192   PetscFunctionBegin;
1193   *missing = PETSC_FALSE;
1194   PetscFunctionReturn(PETSC_SUCCESS);
1195 }
1196 
1197 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1198 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1199 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1200 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1201 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1202 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1203 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1204 
1205 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1206                                        MatGetRow_MPIDense,
1207                                        MatRestoreRow_MPIDense,
1208                                        MatMult_MPIDense,
1209                                        /*  4*/ MatMultAdd_MPIDense,
1210                                        MatMultTranspose_MPIDense,
1211                                        MatMultTransposeAdd_MPIDense,
1212                                        NULL,
1213                                        NULL,
1214                                        NULL,
1215                                        /* 10*/ NULL,
1216                                        NULL,
1217                                        NULL,
1218                                        NULL,
1219                                        MatTranspose_MPIDense,
1220                                        /* 15*/ MatGetInfo_MPIDense,
1221                                        MatEqual_MPIDense,
1222                                        MatGetDiagonal_MPIDense,
1223                                        MatDiagonalScale_MPIDense,
1224                                        MatNorm_MPIDense,
1225                                        /* 20*/ MatAssemblyBegin_MPIDense,
1226                                        MatAssemblyEnd_MPIDense,
1227                                        MatSetOption_MPIDense,
1228                                        MatZeroEntries_MPIDense,
1229                                        /* 24*/ MatZeroRows_MPIDense,
1230                                        NULL,
1231                                        NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        /* 29*/ MatSetUp_MPIDense,
1235                                        NULL,
1236                                        NULL,
1237                                        MatGetDiagonalBlock_MPIDense,
1238                                        NULL,
1239                                        /* 34*/ MatDuplicate_MPIDense,
1240                                        NULL,
1241                                        NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        /* 39*/ MatAXPY_MPIDense,
1245                                        MatCreateSubMatrices_MPIDense,
1246                                        NULL,
1247                                        MatGetValues_MPIDense,
1248                                        MatCopy_MPIDense,
1249                                        /* 44*/ NULL,
1250                                        MatScale_MPIDense,
1251                                        MatShift_MPIDense,
1252                                        NULL,
1253                                        NULL,
1254                                        /* 49*/ MatSetRandom_MPIDense,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        /* 54*/ NULL,
1260                                        NULL,
1261                                        NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1265                                        MatDestroy_MPIDense,
1266                                        MatView_MPIDense,
1267                                        NULL,
1268                                        NULL,
1269                                        /* 64*/ NULL,
1270                                        NULL,
1271                                        NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        /* 69*/ NULL,
1275                                        NULL,
1276                                        NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        /* 74*/ NULL,
1280                                        NULL,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        /* 79*/ NULL,
1285                                        NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        /* 83*/ MatLoad_MPIDense,
1289                                        NULL,
1290                                        NULL,
1291                                        NULL,
1292                                        NULL,
1293                                        NULL,
1294                                        /* 89*/ NULL,
1295                                        NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        /* 94*/ NULL,
1300                                        NULL,
1301                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1302                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1303                                        NULL,
1304                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1305                                        NULL,
1306                                        NULL,
1307                                        MatConjugate_MPIDense,
1308                                        NULL,
1309                                        /*104*/ NULL,
1310                                        MatRealPart_MPIDense,
1311                                        MatImaginaryPart_MPIDense,
1312                                        NULL,
1313                                        NULL,
1314                                        /*109*/ NULL,
1315                                        NULL,
1316                                        NULL,
1317                                        MatGetColumnVector_MPIDense,
1318                                        MatMissingDiagonal_MPIDense,
1319                                        /*114*/ NULL,
1320                                        NULL,
1321                                        NULL,
1322                                        NULL,
1323                                        NULL,
1324                                        /*119*/ NULL,
1325                                        NULL,
1326                                        MatMultHermitianTranspose_MPIDense,
1327                                        MatMultHermitianTransposeAdd_MPIDense,
1328                                        NULL,
1329                                        /*124*/ NULL,
1330                                        MatGetColumnReductions_MPIDense,
1331                                        NULL,
1332                                        NULL,
1333                                        NULL,
1334                                        /*129*/ NULL,
1335                                        NULL,
1336                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1337                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1338                                        NULL,
1339                                        /*134*/ NULL,
1340                                        NULL,
1341                                        NULL,
1342                                        NULL,
1343                                        NULL,
1344                                        /*139*/ NULL,
1345                                        NULL,
1346                                        NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1350                                        /*145*/ NULL,
1351                                        NULL,
1352                                        NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        /*150*/ NULL,
1356                                        NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        /*155*/ NULL,
1361                                        NULL};
1362 
1363 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1364 {
1365   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1366   MatType       mtype = MATSEQDENSE;
1367 
1368   PetscFunctionBegin;
1369   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1370   PetscCall(PetscLayoutSetUp(mat->rmap));
1371   PetscCall(PetscLayoutSetUp(mat->cmap));
1372   if (!a->A) {
1373     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1374     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1375   }
1376 #if defined(PETSC_HAVE_CUDA)
1377   PetscBool iscuda;
1378   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1379   if (iscuda) mtype = MATSEQDENSECUDA;
1380 #endif
1381 #if defined(PETSC_HAVE_HIP)
1382   PetscBool iship;
1383   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1384   if (iship) mtype = MATSEQDENSEHIP;
1385 #endif
1386   PetscCall(MatSetType(a->A, mtype));
1387   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1388 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1389   mat->offloadmask = a->A->offloadmask;
1390 #endif
1391   mat->preallocated = PETSC_TRUE;
1392   mat->assembled    = PETSC_TRUE;
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1397 {
1398   Mat B, C;
1399 
1400   PetscFunctionBegin;
1401   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1402   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1403   PetscCall(MatDestroy(&C));
1404   if (reuse == MAT_REUSE_MATRIX) {
1405     C = *newmat;
1406   } else C = NULL;
1407   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1408   PetscCall(MatDestroy(&B));
1409   if (reuse == MAT_INPLACE_MATRIX) {
1410     PetscCall(MatHeaderReplace(A, &C));
1411   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1412   PetscFunctionReturn(PETSC_SUCCESS);
1413 }
1414 
1415 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1416 {
1417   Mat B, C;
1418 
1419   PetscFunctionBegin;
1420   PetscCall(MatDenseGetLocalMatrix(A, &C));
1421   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1422   if (reuse == MAT_REUSE_MATRIX) {
1423     C = *newmat;
1424   } else C = NULL;
1425   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1426   PetscCall(MatDestroy(&B));
1427   if (reuse == MAT_INPLACE_MATRIX) {
1428     PetscCall(MatHeaderReplace(A, &C));
1429   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1430   PetscFunctionReturn(PETSC_SUCCESS);
1431 }
1432 
1433 #if defined(PETSC_HAVE_ELEMENTAL)
1434 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1435 {
1436   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1437   Mat           mat_elemental;
1438   PetscScalar  *v;
1439   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1440 
1441   PetscFunctionBegin;
1442   if (reuse == MAT_REUSE_MATRIX) {
1443     mat_elemental = *newmat;
1444     PetscCall(MatZeroEntries(*newmat));
1445   } else {
1446     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1447     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1448     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1449     PetscCall(MatSetUp(mat_elemental));
1450     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1451   }
1452 
1453   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1454   for (i = 0; i < N; i++) cols[i] = i;
1455   for (i = 0; i < m; i++) rows[i] = rstart + i;
1456 
1457   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1458   PetscCall(MatDenseGetArray(A, &v));
1459   PetscCall(MatDenseGetLDA(a->A, &lda));
1460   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1461   else {
1462     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1463   }
1464   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1465   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1466   PetscCall(MatDenseRestoreArray(A, &v));
1467   PetscCall(PetscFree2(rows, cols));
1468 
1469   if (reuse == MAT_INPLACE_MATRIX) {
1470     PetscCall(MatHeaderReplace(A, &mat_elemental));
1471   } else {
1472     *newmat = mat_elemental;
1473   }
1474   PetscFunctionReturn(PETSC_SUCCESS);
1475 }
1476 #endif
1477 
1478 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1479 {
1480   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1481 
1482   PetscFunctionBegin;
1483   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1484   PetscFunctionReturn(PETSC_SUCCESS);
1485 }
1486 
1487 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1488 {
1489   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1490 
1491   PetscFunctionBegin;
1492   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1493   PetscFunctionReturn(PETSC_SUCCESS);
1494 }
1495 
1496 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1497 {
1498   Mat_MPIDense *mat;
1499   PetscInt      m, nloc, N;
1500 
1501   PetscFunctionBegin;
1502   PetscCall(MatGetSize(inmat, &m, &N));
1503   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1504   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1505     PetscInt sum;
1506 
1507     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1508     /* Check sum(n) = N */
1509     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1510     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1511 
1512     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1513     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1514   }
1515 
1516   /* numeric phase */
1517   mat = (Mat_MPIDense *)(*outmat)->data;
1518   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1519   PetscFunctionReturn(PETSC_SUCCESS);
1520 }
1521 
1522 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1523 {
1524   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1525   PetscInt      lda;
1526 
1527   PetscFunctionBegin;
1528   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1529   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1530   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1531   a->vecinuse = col + 1;
1532   PetscCall(MatDenseGetLDA(a->A, &lda));
1533   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1534   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1535   *v = a->cvec;
1536   PetscFunctionReturn(PETSC_SUCCESS);
1537 }
1538 
1539 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1540 {
1541   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1542 
1543   PetscFunctionBegin;
1544   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1545   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1546   VecCheckAssembled(a->cvec);
1547   a->vecinuse = 0;
1548   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1549   PetscCall(VecResetArray(a->cvec));
1550   if (v) *v = NULL;
1551   PetscFunctionReturn(PETSC_SUCCESS);
1552 }
1553 
1554 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1555 {
1556   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1557   PetscInt      lda;
1558 
1559   PetscFunctionBegin;
1560   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1561   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1562   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1563   a->vecinuse = col + 1;
1564   PetscCall(MatDenseGetLDA(a->A, &lda));
1565   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1566   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1567   PetscCall(VecLockReadPush(a->cvec));
1568   *v = a->cvec;
1569   PetscFunctionReturn(PETSC_SUCCESS);
1570 }
1571 
1572 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1573 {
1574   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1575 
1576   PetscFunctionBegin;
1577   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1578   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1579   VecCheckAssembled(a->cvec);
1580   a->vecinuse = 0;
1581   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1582   PetscCall(VecLockReadPop(a->cvec));
1583   PetscCall(VecResetArray(a->cvec));
1584   if (v) *v = NULL;
1585   PetscFunctionReturn(PETSC_SUCCESS);
1586 }
1587 
1588 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1589 {
1590   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1591   PetscInt      lda;
1592 
1593   PetscFunctionBegin;
1594   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1595   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1596   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1597   a->vecinuse = col + 1;
1598   PetscCall(MatDenseGetLDA(a->A, &lda));
1599   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1600   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1601   *v = a->cvec;
1602   PetscFunctionReturn(PETSC_SUCCESS);
1603 }
1604 
1605 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1606 {
1607   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1608 
1609   PetscFunctionBegin;
1610   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1611   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1612   VecCheckAssembled(a->cvec);
1613   a->vecinuse = 0;
1614   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1615   PetscCall(VecResetArray(a->cvec));
1616   if (v) *v = NULL;
1617   PetscFunctionReturn(PETSC_SUCCESS);
1618 }
1619 
1620 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1621 {
1622   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1623   Mat_MPIDense *c;
1624   MPI_Comm      comm;
1625   PetscInt      pbegin, pend;
1626 
1627   PetscFunctionBegin;
1628   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1629   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1630   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1631   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1632   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1633   if (!a->cmat) {
1634     PetscCall(MatCreate(comm, &a->cmat));
1635     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1636     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1637     else {
1638       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1639       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1640       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1641     }
1642     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1643     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1644   } else {
1645     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1646     if (same && a->cmat->rmap->N != A->rmap->N) {
1647       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1648       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1649     }
1650     if (!same) {
1651       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1652       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1653       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1654       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1655       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1656     }
1657     if (cend - cbegin != a->cmat->cmap->N) {
1658       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1659       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1660       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1661       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1662     }
1663   }
1664   c = (Mat_MPIDense *)a->cmat->data;
1665   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1666   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1667 
1668   a->cmat->preallocated = PETSC_TRUE;
1669   a->cmat->assembled    = PETSC_TRUE;
1670 #if defined(PETSC_HAVE_DEVICE)
1671   a->cmat->offloadmask = c->A->offloadmask;
1672 #endif
1673   a->matinuse = cbegin + 1;
1674   *v          = a->cmat;
1675   PetscFunctionReturn(PETSC_SUCCESS);
1676 }
1677 
1678 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1679 {
1680   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1681   Mat_MPIDense *c;
1682 
1683   PetscFunctionBegin;
1684   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1685   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1686   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1687   a->matinuse = 0;
1688   c           = (Mat_MPIDense *)a->cmat->data;
1689   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1690   if (v) *v = NULL;
1691 #if defined(PETSC_HAVE_DEVICE)
1692   A->offloadmask = a->A->offloadmask;
1693 #endif
1694   PetscFunctionReturn(PETSC_SUCCESS);
1695 }
1696 
1697 /*MC
1698    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1699 
1700    Options Database Key:
1701 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1702 
1703   Level: beginner
1704 
1705 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1706 M*/
1707 PetscErrorCode MatCreate_MPIDense(Mat mat)
1708 {
1709   Mat_MPIDense *a;
1710 
1711   PetscFunctionBegin;
1712   PetscCall(PetscNew(&a));
1713   mat->data   = (void *)a;
1714   mat->ops[0] = MatOps_Values;
1715 
1716   mat->insertmode = NOT_SET_VALUES;
1717 
1718   /* build cache for off array entries formed */
1719   a->donotstash = PETSC_FALSE;
1720 
1721   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1722 
1723   /* stuff used for matrix vector multiply */
1724   a->lvec        = NULL;
1725   a->Mvctx       = NULL;
1726   a->roworiented = PETSC_TRUE;
1727 
1728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1749 #if defined(PETSC_HAVE_ELEMENTAL)
1750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1751 #endif
1752 #if defined(PETSC_HAVE_SCALAPACK)
1753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1754 #endif
1755 #if defined(PETSC_HAVE_CUDA)
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1757 #endif
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1761 #if defined(PETSC_HAVE_CUDA)
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1764 #endif
1765 #if defined(PETSC_HAVE_HIP)
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1769 #endif
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1775   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1776   PetscFunctionReturn(PETSC_SUCCESS);
1777 }
1778 
1779 /*MC
1780    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1781 
1782    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1783    and `MATMPIDENSE` otherwise.
1784 
1785    Options Database Key:
1786 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1787 
1788   Level: beginner
1789 
1790 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1791 M*/
1792 
1793 /*@
1794   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1795 
1796   Collective
1797 
1798   Input Parameters:
1799 + B    - the matrix
1800 - data - optional location of matrix data.  Set to `NULL` for PETSc
1801          to control all matrix memory allocation.
1802 
1803   Level: intermediate
1804 
1805   Notes:
1806   The dense format is fully compatible with standard Fortran
1807   storage by columns.
1808 
1809   The data input variable is intended primarily for Fortran programmers
1810   who wish to allocate their own matrix memory space.  Most users should
1811   set `data` to `NULL`.
1812 
1813 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1814 @*/
1815 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1816 {
1817   PetscFunctionBegin;
1818   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1819   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1820   PetscFunctionReturn(PETSC_SUCCESS);
1821 }
1822 
1823 /*@
1824   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1825   array provided by the user. This is useful to avoid copying an array
1826   into a matrix
1827 
1828   Not Collective
1829 
1830   Input Parameters:
1831 + mat   - the matrix
1832 - array - the array in column major order
1833 
1834   Level: developer
1835 
1836   Note:
1837   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1838   freed when the matrix is destroyed.
1839 
1840 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1841           `MatDenseReplaceArray()`
1842 @*/
1843 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1844 {
1845   PetscFunctionBegin;
1846   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1847   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1848   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1849 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1850   mat->offloadmask = PETSC_OFFLOAD_CPU;
1851 #endif
1852   PetscFunctionReturn(PETSC_SUCCESS);
1853 }
1854 
1855 /*@
1856   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1857 
1858   Not Collective
1859 
1860   Input Parameter:
1861 . mat - the matrix
1862 
1863   Level: developer
1864 
1865   Note:
1866   You can only call this after a call to `MatDensePlaceArray()`
1867 
1868 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1869 @*/
1870 PetscErrorCode MatDenseResetArray(Mat mat)
1871 {
1872   PetscFunctionBegin;
1873   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1874   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1875   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1876   PetscFunctionReturn(PETSC_SUCCESS);
1877 }
1878 
1879 /*@
1880   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1881   array provided by the user. This is useful to avoid copying an array
1882   into a matrix
1883 
1884   Not Collective
1885 
1886   Input Parameters:
1887 + mat   - the matrix
1888 - array - the array in column major order
1889 
1890   Level: developer
1891 
1892   Note:
1893   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1894   freed by the user. It will be freed when the matrix is destroyed.
1895 
1896 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1897 @*/
1898 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1899 {
1900   PetscFunctionBegin;
1901   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1902   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1903   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1904 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1905   mat->offloadmask = PETSC_OFFLOAD_CPU;
1906 #endif
1907   PetscFunctionReturn(PETSC_SUCCESS);
1908 }
1909 
1910 /*@
1911   MatCreateDense - Creates a matrix in `MATDENSE` format.
1912 
1913   Collective
1914 
1915   Input Parameters:
1916 + comm - MPI communicator
1917 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1918 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1919 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1920 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1921 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1922    to control all matrix memory allocation.
1923 
1924   Output Parameter:
1925 . A - the matrix
1926 
1927   Level: intermediate
1928 
1929   Notes:
1930   The dense format is fully compatible with standard Fortran
1931   storage by columns.
1932 
1933   Although local portions of the matrix are stored in column-major
1934   order, the matrix is partitioned across MPI ranks by row.
1935 
1936   The data input variable is intended primarily for Fortran programmers
1937   who wish to allocate their own matrix memory space.  Most users should
1938   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1939 
1940   The user MUST specify either the local or global matrix dimensions
1941   (possibly both).
1942 
1943 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1944 @*/
1945 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1946 {
1947   PetscFunctionBegin;
1948   PetscCall(MatCreate(comm, A));
1949   PetscCall(MatSetSizes(*A, m, n, M, N));
1950   PetscCall(MatSetType(*A, MATDENSE));
1951   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1952   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1953   PetscFunctionReturn(PETSC_SUCCESS);
1954 }
1955 
1956 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1957 {
1958   Mat           mat;
1959   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1960 
1961   PetscFunctionBegin;
1962   *newmat = NULL;
1963   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1964   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1965   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1966   a = (Mat_MPIDense *)mat->data;
1967 
1968   mat->factortype   = A->factortype;
1969   mat->assembled    = PETSC_TRUE;
1970   mat->preallocated = PETSC_TRUE;
1971 
1972   mat->insertmode = NOT_SET_VALUES;
1973   a->donotstash   = oldmat->donotstash;
1974 
1975   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1976   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1977 
1978   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1979 
1980   *newmat = mat;
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1985 {
1986   PetscBool isbinary;
1987 #if defined(PETSC_HAVE_HDF5)
1988   PetscBool ishdf5;
1989 #endif
1990 
1991   PetscFunctionBegin;
1992   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1993   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1994   /* force binary viewer to load .info file if it has not yet done so */
1995   PetscCall(PetscViewerSetUp(viewer));
1996   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1997 #if defined(PETSC_HAVE_HDF5)
1998   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1999 #endif
2000   if (isbinary) {
2001     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2002 #if defined(PETSC_HAVE_HDF5)
2003   } else if (ishdf5) {
2004     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2005 #endif
2006   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2007   PetscFunctionReturn(PETSC_SUCCESS);
2008 }
2009 
2010 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2011 {
2012   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2013   Mat           a, b;
2014 
2015   PetscFunctionBegin;
2016   a = matA->A;
2017   b = matB->A;
2018   PetscCall(MatEqual(a, b, flag));
2019   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2020   PetscFunctionReturn(PETSC_SUCCESS);
2021 }
2022 
2023 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2024 {
2025   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2026 
2027   PetscFunctionBegin;
2028   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2029   PetscCall(MatDestroy(&atb->atb));
2030   PetscCall(PetscFree(atb));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2035 {
2036   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2037 
2038   PetscFunctionBegin;
2039   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2040   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2041   PetscCall(PetscFree(abt));
2042   PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044 
2045 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2046 {
2047   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2048   Mat_TransMatMultDense *atb;
2049   MPI_Comm               comm;
2050   PetscMPIInt            size, *recvcounts;
2051   PetscScalar           *carray, *sendbuf;
2052   const PetscScalar     *atbarray;
2053   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2054   const PetscInt        *ranges;
2055 
2056   PetscFunctionBegin;
2057   MatCheckProduct(C, 3);
2058   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2059   atb        = (Mat_TransMatMultDense *)C->product->data;
2060   recvcounts = atb->recvcounts;
2061   sendbuf    = atb->sendbuf;
2062 
2063   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2064   PetscCallMPI(MPI_Comm_size(comm, &size));
2065 
2066   /* compute atbarray = aseq^T * bseq */
2067   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2068 
2069   PetscCall(MatGetOwnershipRanges(C, &ranges));
2070 
2071   /* arrange atbarray into sendbuf */
2072   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2073   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2074   for (proc = 0, k = 0; proc < size; proc++) {
2075     for (j = 0; j < cN; j++) {
2076       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2077     }
2078   }
2079   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2080 
2081   /* sum all atbarray to local values of C */
2082   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2083   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2084   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2085   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2086   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2087   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2092 {
2093   MPI_Comm               comm;
2094   PetscMPIInt            size;
2095   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2096   Mat_TransMatMultDense *atb;
2097   PetscBool              cisdense = PETSC_FALSE;
2098   const PetscInt        *ranges;
2099 
2100   PetscFunctionBegin;
2101   MatCheckProduct(C, 4);
2102   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2103   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2104   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2105     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2106   }
2107 
2108   /* create matrix product C */
2109   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2110 #if defined(PETSC_HAVE_CUDA)
2111   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2112 #endif
2113 #if defined(PETSC_HAVE_HIP)
2114   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2115 #endif
2116   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2117   PetscCall(MatSetUp(C));
2118 
2119   /* create data structure for reuse C */
2120   PetscCallMPI(MPI_Comm_size(comm, &size));
2121   PetscCall(PetscNew(&atb));
2122   cM = C->rmap->N;
2123   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2124   PetscCall(MatGetOwnershipRanges(C, &ranges));
2125   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2126   C->product->data    = atb;
2127   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2128   PetscFunctionReturn(PETSC_SUCCESS);
2129 }
2130 
2131 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2132 {
2133   MPI_Comm               comm;
2134   PetscMPIInt            i, size;
2135   PetscInt               maxRows, bufsiz;
2136   PetscMPIInt            tag;
2137   PetscInt               alg;
2138   Mat_MatTransMultDense *abt;
2139   Mat_Product           *product = C->product;
2140   PetscBool              flg;
2141 
2142   PetscFunctionBegin;
2143   MatCheckProduct(C, 4);
2144   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2145   /* check local size of A and B */
2146   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2147 
2148   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2149   alg = flg ? 0 : 1;
2150 
2151   /* setup matrix product C */
2152   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2153   PetscCall(MatSetType(C, MATMPIDENSE));
2154   PetscCall(MatSetUp(C));
2155   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2156 
2157   /* create data structure for reuse C */
2158   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2159   PetscCallMPI(MPI_Comm_size(comm, &size));
2160   PetscCall(PetscNew(&abt));
2161   abt->tag = tag;
2162   abt->alg = alg;
2163   switch (alg) {
2164   case 1: /* alg: "cyclic" */
2165     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2166     bufsiz = A->cmap->N * maxRows;
2167     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2168     break;
2169   default: /* alg: "allgatherv" */
2170     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2171     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2172     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2173     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2174     break;
2175   }
2176 
2177   C->product->data                = abt;
2178   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2179   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2180   PetscFunctionReturn(PETSC_SUCCESS);
2181 }
2182 
2183 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2184 {
2185   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2186   Mat_MatTransMultDense *abt;
2187   MPI_Comm               comm;
2188   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2189   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2190   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2191   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2192   const PetscScalar     *av, *bv;
2193   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2194   MPI_Request            reqs[2];
2195   const PetscInt        *ranges;
2196 
2197   PetscFunctionBegin;
2198   MatCheckProduct(C, 3);
2199   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2200   abt = (Mat_MatTransMultDense *)C->product->data;
2201   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2202   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2203   PetscCallMPI(MPI_Comm_size(comm, &size));
2204   PetscCall(MatDenseGetArrayRead(a->A, &av));
2205   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2206   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2207   PetscCall(MatDenseGetLDA(a->A, &i));
2208   PetscCall(PetscBLASIntCast(i, &alda));
2209   PetscCall(MatDenseGetLDA(b->A, &i));
2210   PetscCall(PetscBLASIntCast(i, &blda));
2211   PetscCall(MatDenseGetLDA(c->A, &i));
2212   PetscCall(PetscBLASIntCast(i, &clda));
2213   PetscCall(MatGetOwnershipRanges(B, &ranges));
2214   bn = B->rmap->n;
2215   if (blda == bn) {
2216     sendbuf = (PetscScalar *)bv;
2217   } else {
2218     sendbuf = abt->buf[0];
2219     for (k = 0, i = 0; i < cK; i++) {
2220       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2221     }
2222   }
2223   if (size > 1) {
2224     sendto   = (rank + size - 1) % size;
2225     recvfrom = (rank + size + 1) % size;
2226   } else {
2227     sendto = recvfrom = 0;
2228   }
2229   PetscCall(PetscBLASIntCast(cK, &ck));
2230   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2231   recvisfrom = rank;
2232   for (i = 0; i < size; i++) {
2233     /* we have finished receiving in sending, bufs can be read/modified */
2234     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2235     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2236 
2237     if (nextrecvisfrom != rank) {
2238       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2239       sendsiz = cK * bn;
2240       recvsiz = cK * nextbn;
2241       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2242       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2243       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2244     }
2245 
2246     /* local aseq * sendbuf^T */
2247     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2248     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2249 
2250     if (nextrecvisfrom != rank) {
2251       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2252       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2253     }
2254     bn         = nextbn;
2255     recvisfrom = nextrecvisfrom;
2256     sendbuf    = recvbuf;
2257   }
2258   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2259   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2260   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2261   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2262   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2263   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2264   PetscFunctionReturn(PETSC_SUCCESS);
2265 }
2266 
2267 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2268 {
2269   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2270   Mat_MatTransMultDense *abt;
2271   MPI_Comm               comm;
2272   PetscMPIInt            size, ibn;
2273   PetscScalar           *cv, *sendbuf, *recvbuf;
2274   const PetscScalar     *av, *bv;
2275   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2276   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2277   PetscBLASInt           cm, cn, ck, alda, clda;
2278 
2279   PetscFunctionBegin;
2280   MatCheckProduct(C, 3);
2281   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2282   abt = (Mat_MatTransMultDense *)C->product->data;
2283   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2284   PetscCallMPI(MPI_Comm_size(comm, &size));
2285   PetscCall(MatDenseGetArrayRead(a->A, &av));
2286   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2287   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2288   PetscCall(MatDenseGetLDA(a->A, &i));
2289   PetscCall(PetscBLASIntCast(i, &alda));
2290   PetscCall(MatDenseGetLDA(b->A, &blda));
2291   PetscCall(MatDenseGetLDA(c->A, &i));
2292   PetscCall(PetscBLASIntCast(i, &clda));
2293   /* copy transpose of B into buf[0] */
2294   bn      = B->rmap->n;
2295   sendbuf = abt->buf[0];
2296   recvbuf = abt->buf[1];
2297   for (k = 0, j = 0; j < bn; j++) {
2298     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2299   }
2300   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2301   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2302   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2303   PetscCall(PetscBLASIntCast(cK, &ck));
2304   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2305   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2306   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2307   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2308   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2309   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2310   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2311   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2312   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2313   PetscFunctionReturn(PETSC_SUCCESS);
2314 }
2315 
2316 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2317 {
2318   Mat_MatTransMultDense *abt;
2319 
2320   PetscFunctionBegin;
2321   MatCheckProduct(C, 3);
2322   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2323   abt = (Mat_MatTransMultDense *)C->product->data;
2324   switch (abt->alg) {
2325   case 1:
2326     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2327     break;
2328   default:
2329     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2330     break;
2331   }
2332   PetscFunctionReturn(PETSC_SUCCESS);
2333 }
2334 
2335 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2336 {
2337   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2338 
2339   PetscFunctionBegin;
2340   PetscCall(MatDestroy(&ab->Ce));
2341   PetscCall(MatDestroy(&ab->Ae));
2342   PetscCall(MatDestroy(&ab->Be));
2343   PetscCall(PetscFree(ab));
2344   PetscFunctionReturn(PETSC_SUCCESS);
2345 }
2346 
2347 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2348 {
2349   Mat_MatMultDense *ab;
2350   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2351 
2352   PetscFunctionBegin;
2353   MatCheckProduct(C, 3);
2354   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2355   ab = (Mat_MatMultDense *)C->product->data;
2356   if (ab->Ae && ab->Ce) {
2357 #if PetscDefined(HAVE_ELEMENTAL)
2358     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2359     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2360     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2361     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2362 #else
2363     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2364 #endif
2365   } else {
2366     const PetscScalar *read;
2367     PetscScalar       *write;
2368     PetscInt           lda;
2369 
2370     PetscCall(MatDenseGetLDA(B, &lda));
2371     PetscCall(MatDenseGetArrayRead(B, &read));
2372     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2373     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2374     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2375       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2376       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2377     }
2378     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2379     PetscCall(MatDenseRestoreArrayRead(B, &read));
2380     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2381   }
2382   PetscFunctionReturn(PETSC_SUCCESS);
2383 }
2384 
2385 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2386 {
2387   Mat_Product      *product = C->product;
2388   PetscInt          alg;
2389   Mat_MatMultDense *ab;
2390   PetscBool         flg;
2391 
2392   PetscFunctionBegin;
2393   MatCheckProduct(C, 4);
2394   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2395   /* check local size of A and B */
2396   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2397              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2398 
2399   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2400   alg = flg ? 0 : 1;
2401 
2402   /* setup C */
2403   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2404   PetscCall(MatSetType(C, MATMPIDENSE));
2405   PetscCall(MatSetUp(C));
2406 
2407   /* create data structure for reuse Cdense */
2408   PetscCall(PetscNew(&ab));
2409 
2410   switch (alg) {
2411   case 1: /* alg: "elemental" */
2412 #if PetscDefined(HAVE_ELEMENTAL)
2413     /* create elemental matrices Ae and Be */
2414     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2415     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2416     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2417     PetscCall(MatSetUp(ab->Ae));
2418     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2419 
2420     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2421     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2422     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2423     PetscCall(MatSetUp(ab->Be));
2424     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2425 
2426     /* compute symbolic Ce = Ae*Be */
2427     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2428     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2429 #else
2430     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2431 #endif
2432     break;
2433   default: /* alg: "petsc" */
2434     ab->Ae = NULL;
2435     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2436     ab->Ce = NULL;
2437     break;
2438   }
2439 
2440   C->product->data       = ab;
2441   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2442   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2443   PetscFunctionReturn(PETSC_SUCCESS);
2444 }
2445 
2446 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2447 {
2448   Mat_Product *product     = C->product;
2449   const char  *algTypes[2] = {"petsc", "elemental"};
2450   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2451   PetscBool    flg = PETSC_FALSE;
2452 
2453   PetscFunctionBegin;
2454   /* Set default algorithm */
2455   alg = 0; /* default is petsc */
2456   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2457   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2458 
2459   /* Get runtime option */
2460   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2461   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2462   PetscOptionsEnd();
2463   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2464 
2465   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2466   C->ops->productsymbolic = MatProductSymbolic_AB;
2467   PetscFunctionReturn(PETSC_SUCCESS);
2468 }
2469 
2470 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2471 {
2472   Mat_Product *product = C->product;
2473   Mat          A = product->A, B = product->B;
2474 
2475   PetscFunctionBegin;
2476   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2477              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2478   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2479   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2480   PetscFunctionReturn(PETSC_SUCCESS);
2481 }
2482 
2483 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2484 {
2485   Mat_Product *product     = C->product;
2486   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2487   PetscInt     alg, nalg = 2;
2488   PetscBool    flg = PETSC_FALSE;
2489 
2490   PetscFunctionBegin;
2491   /* Set default algorithm */
2492   alg = 0; /* default is allgatherv */
2493   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2494   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2495 
2496   /* Get runtime option */
2497   if (product->api_user) {
2498     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2499     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2500     PetscOptionsEnd();
2501   } else {
2502     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2503     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2504     PetscOptionsEnd();
2505   }
2506   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2507 
2508   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2509   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2510   PetscFunctionReturn(PETSC_SUCCESS);
2511 }
2512 
2513 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2514 {
2515   Mat_Product *product = C->product;
2516 
2517   PetscFunctionBegin;
2518   switch (product->type) {
2519   case MATPRODUCT_AB:
2520     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2521     break;
2522   case MATPRODUCT_AtB:
2523     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2524     break;
2525   case MATPRODUCT_ABt:
2526     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2527     break;
2528   default:
2529     break;
2530   }
2531   PetscFunctionReturn(PETSC_SUCCESS);
2532 }
2533