xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f0b74427b291237450348b8514d67555ad08bce6)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right-hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
519   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
520   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
521   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
522   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
523   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
524   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
525   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecSet(yy, 0.0));
539   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555 
556   PetscFunctionBegin;
557   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
558   PetscCall(VecSet(yy, 0.0));
559   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
560   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
561   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
562   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
563   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
564   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
565   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
566   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
571 {
572   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
573   const PetscScalar *ax;
574   PetscScalar       *ay;
575   PetscMemType       axmtype, aymtype;
576 
577   PetscFunctionBegin;
578   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
579   PetscCall(VecCopy(yy, zz));
580   PetscMPIInt rank;
581   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
582   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
583   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
584   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
585   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
586   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
587   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
588   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
593 {
594   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
595   const PetscScalar *ax;
596   PetscScalar       *ay;
597   PetscMemType       axmtype, aymtype;
598 
599   PetscFunctionBegin;
600   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
601   PetscCall(VecCopy(yy, zz));
602   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
603   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
604   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
605   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
606   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
607   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
608   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
609   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
635 {
636   PetscFunctionBegin;
637   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
642 {
643   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
644   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
645   PetscScalar       *x;
646   const PetscScalar *av;
647 
648   PetscFunctionBegin;
649   PetscCall(VecGetArray(v, &x));
650   PetscCall(VecGetSize(v, &ng));
651   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
652   PetscCall(VecGetLocalSize(v, &nl));
653   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
654   radd = A->rmap->rstart * m;
655   PetscCall(MatDenseGetArrayRead(a->A, &av));
656   PetscCall(MatDenseGetLDA(a->A, &lda));
657   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
658   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
659   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
660   PetscCall(VecRestoreArray(v, &x));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
665 {
666   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
667 
668   PetscFunctionBegin;
669   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
670   PetscCall(MatStashDestroy_Private(&mat->stash));
671   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
672   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
673   PetscCall(MatDestroy(&mdn->A));
674   PetscCall(VecDestroy(&mdn->lvec));
675   PetscCall(PetscSFDestroy(&mdn->Mvctx));
676   PetscCall(VecDestroy(&mdn->cvec));
677   PetscCall(MatDestroy(&mdn->cmat));
678 
679   PetscCall(PetscFree(mat->data));
680   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
681 
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
695 #if defined(PETSC_HAVE_ELEMENTAL)
696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
697 #endif
698 #if defined(PETSC_HAVE_SCALAPACK)
699   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
700 #endif
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
703   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
704 #if defined(PETSC_HAVE_CUDA)
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
723 #endif
724 #if defined(PETSC_HAVE_HIP)
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
743 #endif
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
757 
758   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 #include <petscdraw.h>
763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
764 {
765   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
766   PetscMPIInt       rank;
767   PetscViewerType   vtype;
768   PetscBool         iascii, isdraw;
769   PetscViewer       sviewer;
770   PetscViewerFormat format;
771 
772   PetscFunctionBegin;
773   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
775   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
776   if (iascii) {
777     PetscCall(PetscViewerGetType(viewer, &vtype));
778     PetscCall(PetscViewerGetFormat(viewer, &format));
779     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
780       MatInfo info;
781       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
782       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
783       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
784                                                    (PetscInt)info.memory));
785       PetscCall(PetscViewerFlush(viewer));
786       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
787       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
788       PetscFunctionReturn(PETSC_SUCCESS);
789     } else if (format == PETSC_VIEWER_ASCII_INFO) {
790       PetscFunctionReturn(PETSC_SUCCESS);
791     }
792   } else if (isdraw) {
793     PetscDraw draw;
794     PetscBool isnull;
795 
796     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
797     PetscCall(PetscDrawIsNull(draw, &isnull));
798     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
799   }
800 
801   {
802     /* assemble the entire matrix onto first processor. */
803     Mat          A;
804     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
805     PetscInt    *cols;
806     PetscScalar *vals;
807 
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
809     if (rank == 0) {
810       PetscCall(MatSetSizes(A, M, N, M, N));
811     } else {
812       PetscCall(MatSetSizes(A, 0, 0, M, N));
813     }
814     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
815     PetscCall(MatSetType(A, MATMPIDENSE));
816     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
817 
818     /* Copy the matrix ... This isn't the most efficient means,
819        but it's quick for now */
820     A->insertmode = INSERT_VALUES;
821 
822     row = mat->rmap->rstart;
823     m   = mdn->A->rmap->n;
824     for (i = 0; i < m; i++) {
825       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
826       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
827       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
828       row++;
829     }
830 
831     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
833     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
834     if (rank == 0) {
835       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
836       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
837     }
838     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
839     PetscCall(MatDestroy(&A));
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
845 {
846   PetscBool iascii, isbinary, isdraw, issocket;
847 
848   PetscFunctionBegin;
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
852   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
853 
854   if (iascii || issocket || isdraw) {
855     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
856   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
861 {
862   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
863   Mat            mdn = mat->A;
864   PetscLogDouble isend[5], irecv[5];
865 
866   PetscFunctionBegin;
867   info->block_size = 1.0;
868 
869   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
870 
871   isend[0] = info->nz_used;
872   isend[1] = info->nz_allocated;
873   isend[2] = info->nz_unneeded;
874   isend[3] = info->memory;
875   isend[4] = info->mallocs;
876   if (flag == MAT_LOCAL) {
877     info->nz_used      = isend[0];
878     info->nz_allocated = isend[1];
879     info->nz_unneeded  = isend[2];
880     info->memory       = isend[3];
881     info->mallocs      = isend[4];
882   } else if (flag == MAT_GLOBAL_MAX) {
883     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
884 
885     info->nz_used      = irecv[0];
886     info->nz_allocated = irecv[1];
887     info->nz_unneeded  = irecv[2];
888     info->memory       = irecv[3];
889     info->mallocs      = irecv[4];
890   } else if (flag == MAT_GLOBAL_SUM) {
891     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
892 
893     info->nz_used      = irecv[0];
894     info->nz_allocated = irecv[1];
895     info->nz_unneeded  = irecv[2];
896     info->memory       = irecv[3];
897     info->mallocs      = irecv[4];
898   }
899   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
900   info->fill_ratio_needed = 0;
901   info->factor_mallocs    = 0;
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
906 {
907   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
908 
909   PetscFunctionBegin;
910   switch (op) {
911   case MAT_NEW_NONZERO_LOCATIONS:
912   case MAT_NEW_NONZERO_LOCATION_ERR:
913   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914     MatCheckPreallocated(A, 1);
915     PetscCall(MatSetOption(a->A, op, flg));
916     break;
917   case MAT_ROW_ORIENTED:
918     MatCheckPreallocated(A, 1);
919     a->roworiented = flg;
920     PetscCall(MatSetOption(a->A, op, flg));
921     break;
922   case MAT_IGNORE_OFF_PROC_ENTRIES:
923     a->donotstash = flg;
924     break;
925   case MAT_SYMMETRIC:
926   case MAT_STRUCTURALLY_SYMMETRIC:
927   case MAT_HERMITIAN:
928   case MAT_SYMMETRY_ETERNAL:
929   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
930   case MAT_SPD:
931   case MAT_SPD_ETERNAL:
932     /* if the diagonal matrix is square it inherits some of the properties above */
933     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
934     break;
935   default:
936     break;
937   }
938   PetscFunctionReturn(PETSC_SUCCESS);
939 }
940 
941 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
942 {
943   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
944   const PetscScalar *l;
945   PetscScalar        x, *v, *vv, *r;
946   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
947 
948   PetscFunctionBegin;
949   PetscCall(MatDenseGetArray(mdn->A, &vv));
950   PetscCall(MatDenseGetLDA(mdn->A, &lda));
951   PetscCall(MatGetLocalSize(A, &s2, &s3));
952   if (ll) {
953     PetscCall(VecGetLocalSize(ll, &s2a));
954     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
955     PetscCall(VecGetArrayRead(ll, &l));
956     for (i = 0; i < m; i++) {
957       x = l[i];
958       v = vv + i;
959       for (j = 0; j < n; j++) {
960         (*v) *= x;
961         v += lda;
962       }
963     }
964     PetscCall(VecRestoreArrayRead(ll, &l));
965     PetscCall(PetscLogFlops(1.0 * n * m));
966   }
967   if (rr) {
968     const PetscScalar *ar;
969 
970     PetscCall(VecGetLocalSize(rr, &s3a));
971     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
972     PetscCall(VecGetArrayRead(rr, &ar));
973     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
974     PetscCall(VecGetArray(mdn->lvec, &r));
975     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
976     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
977     PetscCall(VecRestoreArrayRead(rr, &ar));
978     for (i = 0; i < n; i++) {
979       x = r[i];
980       v = vv + i * lda;
981       for (j = 0; j < m; j++) (*v++) *= x;
982     }
983     PetscCall(VecRestoreArray(mdn->lvec, &r));
984     PetscCall(PetscLogFlops(1.0 * n * m));
985   }
986   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
987   PetscFunctionReturn(PETSC_SUCCESS);
988 }
989 
990 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
991 {
992   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
993   PetscInt           i, j;
994   PetscMPIInt        size;
995   PetscReal          sum = 0.0;
996   const PetscScalar *av, *v;
997 
998   PetscFunctionBegin;
999   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1000   v = av;
1001   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1002   if (size == 1) {
1003     PetscCall(MatNorm(mdn->A, type, nrm));
1004   } else {
1005     if (type == NORM_FROBENIUS) {
1006       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1007         sum += PetscRealPart(PetscConj(*v) * (*v));
1008         v++;
1009       }
1010       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1011       *nrm = PetscSqrtReal(*nrm);
1012       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1013     } else if (type == NORM_1) {
1014       PetscReal *tmp;
1015 
1016       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1017       *nrm = 0.0;
1018       v    = av;
1019       for (j = 0; j < mdn->A->cmap->n; j++) {
1020         for (i = 0; i < mdn->A->rmap->n; i++) {
1021           tmp[j] += PetscAbsScalar(*v);
1022           v++;
1023         }
1024       }
1025       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1026       for (j = 0; j < A->cmap->N; j++) {
1027         if (tmp[j] > *nrm) *nrm = tmp[j];
1028       }
1029       PetscCall(PetscFree(tmp));
1030       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1031     } else if (type == NORM_INFINITY) { /* max row norm */
1032       PetscCall(MatNorm(mdn->A, type, nrm));
1033       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1034     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1035   }
1036   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1041 {
1042   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1043   Mat           B;
1044   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1045   PetscInt      j, i, lda;
1046   PetscScalar  *v;
1047 
1048   PetscFunctionBegin;
1049   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1050   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1051     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1052     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1053     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1054     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1055   } else B = *matout;
1056 
1057   m = a->A->rmap->n;
1058   n = a->A->cmap->n;
1059   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1060   PetscCall(MatDenseGetLDA(a->A, &lda));
1061   PetscCall(PetscMalloc1(m, &rwork));
1062   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1063   for (j = 0; j < n; j++) {
1064     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1065     v = PetscSafePointerPlusOffset(v, lda);
1066   }
1067   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1068   PetscCall(PetscFree(rwork));
1069   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1070   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1071   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1072     *matout = B;
1073   } else {
1074     PetscCall(MatHeaderMerge(A, &B));
1075   }
1076   PetscFunctionReturn(PETSC_SUCCESS);
1077 }
1078 
1079 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1080 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1081 
1082 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1083 {
1084   PetscFunctionBegin;
1085   PetscCall(PetscLayoutSetUp(A->rmap));
1086   PetscCall(PetscLayoutSetUp(A->cmap));
1087   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1088   PetscFunctionReturn(PETSC_SUCCESS);
1089 }
1090 
1091 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1092 {
1093   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1101 {
1102   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(MatConjugate(a->A));
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1110 {
1111   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(MatRealPart(a->A));
1115   PetscFunctionReturn(PETSC_SUCCESS);
1116 }
1117 
1118 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1119 {
1120   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1121 
1122   PetscFunctionBegin;
1123   PetscCall(MatImaginaryPart(a->A));
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1128 {
1129   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1130 
1131   PetscFunctionBegin;
1132   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1133   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1134   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1139 
1140 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1141 {
1142   PetscInt      i, m, n;
1143   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(MatGetSize(A, &m, &n));
1147   if (type == REDUCTION_MEAN_REALPART) {
1148     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1149   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1150     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1151   } else {
1152     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1153   }
1154   if (type == NORM_2) {
1155     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1156   }
1157   if (type == NORM_INFINITY) {
1158     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1159   } else {
1160     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1161   }
1162   if (type == NORM_2) {
1163     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1164   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1165     for (i = 0; i < n; i++) reductions[i] /= m;
1166   }
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1171 {
1172   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1173 
1174   PetscFunctionBegin;
1175   PetscCall(MatSetRandom(d->A, rctx));
1176 #if defined(PETSC_HAVE_DEVICE)
1177   x->offloadmask = d->A->offloadmask;
1178 #endif
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1183 {
1184   PetscFunctionBegin;
1185   *missing = PETSC_FALSE;
1186   PetscFunctionReturn(PETSC_SUCCESS);
1187 }
1188 
1189 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1190 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1191 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1192 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1193 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1194 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1195 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1196 
1197 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1198                                        MatGetRow_MPIDense,
1199                                        MatRestoreRow_MPIDense,
1200                                        MatMult_MPIDense,
1201                                        /*  4*/ MatMultAdd_MPIDense,
1202                                        MatMultTranspose_MPIDense,
1203                                        MatMultTransposeAdd_MPIDense,
1204                                        NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        /* 10*/ NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        NULL,
1211                                        MatTranspose_MPIDense,
1212                                        /* 15*/ MatGetInfo_MPIDense,
1213                                        MatEqual_MPIDense,
1214                                        MatGetDiagonal_MPIDense,
1215                                        MatDiagonalScale_MPIDense,
1216                                        MatNorm_MPIDense,
1217                                        /* 20*/ MatAssemblyBegin_MPIDense,
1218                                        MatAssemblyEnd_MPIDense,
1219                                        MatSetOption_MPIDense,
1220                                        MatZeroEntries_MPIDense,
1221                                        /* 24*/ MatZeroRows_MPIDense,
1222                                        NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        /* 29*/ MatSetUp_MPIDense,
1227                                        NULL,
1228                                        NULL,
1229                                        MatGetDiagonalBlock_MPIDense,
1230                                        NULL,
1231                                        /* 34*/ MatDuplicate_MPIDense,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        /* 39*/ MatAXPY_MPIDense,
1237                                        MatCreateSubMatrices_MPIDense,
1238                                        NULL,
1239                                        MatGetValues_MPIDense,
1240                                        MatCopy_MPIDense,
1241                                        /* 44*/ NULL,
1242                                        MatScale_MPIDense,
1243                                        MatShift_MPIDense,
1244                                        NULL,
1245                                        NULL,
1246                                        /* 49*/ MatSetRandom_MPIDense,
1247                                        NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        NULL,
1251                                        /* 54*/ NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1257                                        MatDestroy_MPIDense,
1258                                        MatView_MPIDense,
1259                                        NULL,
1260                                        NULL,
1261                                        /* 64*/ NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /* 69*/ NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        NULL,
1270                                        NULL,
1271                                        /* 74*/ NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        /* 79*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        /* 83*/ MatLoad_MPIDense,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        /* 89*/ NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        NULL,
1291                                        /* 94*/ NULL,
1292                                        NULL,
1293                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1294                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1295                                        NULL,
1296                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1297                                        NULL,
1298                                        NULL,
1299                                        MatConjugate_MPIDense,
1300                                        NULL,
1301                                        /*104*/ NULL,
1302                                        MatRealPart_MPIDense,
1303                                        MatImaginaryPart_MPIDense,
1304                                        NULL,
1305                                        NULL,
1306                                        /*109*/ NULL,
1307                                        NULL,
1308                                        NULL,
1309                                        MatGetColumnVector_MPIDense,
1310                                        MatMissingDiagonal_MPIDense,
1311                                        /*114*/ NULL,
1312                                        NULL,
1313                                        NULL,
1314                                        NULL,
1315                                        NULL,
1316                                        /*119*/ NULL,
1317                                        NULL,
1318                                        MatMultHermitianTranspose_MPIDense,
1319                                        MatMultHermitianTransposeAdd_MPIDense,
1320                                        NULL,
1321                                        /*124*/ NULL,
1322                                        MatGetColumnReductions_MPIDense,
1323                                        NULL,
1324                                        NULL,
1325                                        NULL,
1326                                        /*129*/ NULL,
1327                                        NULL,
1328                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1329                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1330                                        NULL,
1331                                        /*134*/ NULL,
1332                                        NULL,
1333                                        NULL,
1334                                        NULL,
1335                                        NULL,
1336                                        /*139*/ NULL,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1342                                        /*145*/ NULL,
1343                                        NULL,
1344                                        NULL,
1345                                        NULL,
1346                                        NULL,
1347                                        /*150*/ NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        NULL,
1352                                        /*155*/ NULL,
1353                                        NULL};
1354 
1355 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1356 {
1357   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1358   MatType       mtype = MATSEQDENSE;
1359 
1360   PetscFunctionBegin;
1361   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1362   PetscCall(PetscLayoutSetUp(mat->rmap));
1363   PetscCall(PetscLayoutSetUp(mat->cmap));
1364   if (!a->A) {
1365     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1366     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1367   }
1368 #if defined(PETSC_HAVE_CUDA)
1369   PetscBool iscuda;
1370   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1371   if (iscuda) mtype = MATSEQDENSECUDA;
1372 #endif
1373 #if defined(PETSC_HAVE_HIP)
1374   PetscBool iship;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1376   if (iship) mtype = MATSEQDENSEHIP;
1377 #endif
1378   PetscCall(MatSetType(a->A, mtype));
1379   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1380 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1381   mat->offloadmask = a->A->offloadmask;
1382 #endif
1383   mat->preallocated = PETSC_TRUE;
1384   mat->assembled    = PETSC_TRUE;
1385   PetscFunctionReturn(PETSC_SUCCESS);
1386 }
1387 
1388 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1389 {
1390   Mat B, C;
1391 
1392   PetscFunctionBegin;
1393   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1394   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1395   PetscCall(MatDestroy(&C));
1396   if (reuse == MAT_REUSE_MATRIX) {
1397     C = *newmat;
1398   } else C = NULL;
1399   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1400   PetscCall(MatDestroy(&B));
1401   if (reuse == MAT_INPLACE_MATRIX) {
1402     PetscCall(MatHeaderReplace(A, &C));
1403   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1404   PetscFunctionReturn(PETSC_SUCCESS);
1405 }
1406 
1407 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1408 {
1409   Mat B, C;
1410 
1411   PetscFunctionBegin;
1412   PetscCall(MatDenseGetLocalMatrix(A, &C));
1413   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1414   if (reuse == MAT_REUSE_MATRIX) {
1415     C = *newmat;
1416   } else C = NULL;
1417   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1418   PetscCall(MatDestroy(&B));
1419   if (reuse == MAT_INPLACE_MATRIX) {
1420     PetscCall(MatHeaderReplace(A, &C));
1421   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1422   PetscFunctionReturn(PETSC_SUCCESS);
1423 }
1424 
1425 #if defined(PETSC_HAVE_ELEMENTAL)
1426 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1427 {
1428   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1429   Mat           mat_elemental;
1430   PetscScalar  *v;
1431   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1432 
1433   PetscFunctionBegin;
1434   if (reuse == MAT_REUSE_MATRIX) {
1435     mat_elemental = *newmat;
1436     PetscCall(MatZeroEntries(*newmat));
1437   } else {
1438     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1439     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1440     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1441     PetscCall(MatSetUp(mat_elemental));
1442     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1443   }
1444 
1445   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1446   for (i = 0; i < N; i++) cols[i] = i;
1447   for (i = 0; i < m; i++) rows[i] = rstart + i;
1448 
1449   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1450   PetscCall(MatDenseGetArray(A, &v));
1451   PetscCall(MatDenseGetLDA(a->A, &lda));
1452   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1453   else {
1454     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1455   }
1456   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1457   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1458   PetscCall(MatDenseRestoreArray(A, &v));
1459   PetscCall(PetscFree2(rows, cols));
1460 
1461   if (reuse == MAT_INPLACE_MATRIX) {
1462     PetscCall(MatHeaderReplace(A, &mat_elemental));
1463   } else {
1464     *newmat = mat_elemental;
1465   }
1466   PetscFunctionReturn(PETSC_SUCCESS);
1467 }
1468 #endif
1469 
1470 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1471 {
1472   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1473 
1474   PetscFunctionBegin;
1475   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1476   PetscFunctionReturn(PETSC_SUCCESS);
1477 }
1478 
1479 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1480 {
1481   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1482 
1483   PetscFunctionBegin;
1484   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1489 {
1490   Mat_MPIDense *mat;
1491   PetscInt      m, nloc, N;
1492 
1493   PetscFunctionBegin;
1494   PetscCall(MatGetSize(inmat, &m, &N));
1495   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1496   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1497     PetscInt sum;
1498 
1499     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1500     /* Check sum(n) = N */
1501     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1502     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1503 
1504     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1505     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1506   }
1507 
1508   /* numeric phase */
1509   mat = (Mat_MPIDense *)(*outmat)->data;
1510   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1511   PetscFunctionReturn(PETSC_SUCCESS);
1512 }
1513 
1514 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1515 {
1516   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1517   PetscInt      lda;
1518 
1519   PetscFunctionBegin;
1520   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1521   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1522   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1523   a->vecinuse = col + 1;
1524   PetscCall(MatDenseGetLDA(a->A, &lda));
1525   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1526   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1527   *v = a->cvec;
1528   PetscFunctionReturn(PETSC_SUCCESS);
1529 }
1530 
1531 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1532 {
1533   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1534 
1535   PetscFunctionBegin;
1536   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1537   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1538   VecCheckAssembled(a->cvec);
1539   a->vecinuse = 0;
1540   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1541   PetscCall(VecResetArray(a->cvec));
1542   if (v) *v = NULL;
1543   PetscFunctionReturn(PETSC_SUCCESS);
1544 }
1545 
1546 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1547 {
1548   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1549   PetscInt      lda;
1550 
1551   PetscFunctionBegin;
1552   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1553   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1554   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1555   a->vecinuse = col + 1;
1556   PetscCall(MatDenseGetLDA(a->A, &lda));
1557   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1558   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1559   PetscCall(VecLockReadPush(a->cvec));
1560   *v = a->cvec;
1561   PetscFunctionReturn(PETSC_SUCCESS);
1562 }
1563 
1564 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1565 {
1566   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1567 
1568   PetscFunctionBegin;
1569   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1570   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1571   VecCheckAssembled(a->cvec);
1572   a->vecinuse = 0;
1573   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1574   PetscCall(VecLockReadPop(a->cvec));
1575   PetscCall(VecResetArray(a->cvec));
1576   if (v) *v = NULL;
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1581 {
1582   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1583   PetscInt      lda;
1584 
1585   PetscFunctionBegin;
1586   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1587   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1588   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1589   a->vecinuse = col + 1;
1590   PetscCall(MatDenseGetLDA(a->A, &lda));
1591   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1592   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1593   *v = a->cvec;
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1598 {
1599   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1600 
1601   PetscFunctionBegin;
1602   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1603   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1604   VecCheckAssembled(a->cvec);
1605   a->vecinuse = 0;
1606   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1607   PetscCall(VecResetArray(a->cvec));
1608   if (v) *v = NULL;
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1613 {
1614   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1615   Mat_MPIDense *c;
1616   MPI_Comm      comm;
1617   PetscInt      pbegin, pend;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1621   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1622   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1623   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1624   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1625   if (!a->cmat) {
1626     PetscCall(MatCreate(comm, &a->cmat));
1627     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1628     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1629     else {
1630       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1631       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1632       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1633     }
1634     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1635     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1636   } else {
1637     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1638     if (same && a->cmat->rmap->N != A->rmap->N) {
1639       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1640       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1641     }
1642     if (!same) {
1643       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1644       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1645       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1646       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1647       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1648     }
1649     if (cend - cbegin != a->cmat->cmap->N) {
1650       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1651       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1652       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1653       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1654     }
1655   }
1656   c = (Mat_MPIDense *)a->cmat->data;
1657   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1658   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1659 
1660   a->cmat->preallocated = PETSC_TRUE;
1661   a->cmat->assembled    = PETSC_TRUE;
1662 #if defined(PETSC_HAVE_DEVICE)
1663   a->cmat->offloadmask = c->A->offloadmask;
1664 #endif
1665   a->matinuse = cbegin + 1;
1666   *v          = a->cmat;
1667   PetscFunctionReturn(PETSC_SUCCESS);
1668 }
1669 
1670 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1671 {
1672   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1673   Mat_MPIDense *c;
1674 
1675   PetscFunctionBegin;
1676   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1677   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1678   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1679   a->matinuse = 0;
1680   c           = (Mat_MPIDense *)a->cmat->data;
1681   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1682   if (v) *v = NULL;
1683 #if defined(PETSC_HAVE_DEVICE)
1684   A->offloadmask = a->A->offloadmask;
1685 #endif
1686   PetscFunctionReturn(PETSC_SUCCESS);
1687 }
1688 
1689 /*MC
1690    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1691 
1692    Options Database Key:
1693 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1694 
1695   Level: beginner
1696 
1697 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1698 M*/
1699 PetscErrorCode MatCreate_MPIDense(Mat mat)
1700 {
1701   Mat_MPIDense *a;
1702 
1703   PetscFunctionBegin;
1704   PetscCall(PetscNew(&a));
1705   mat->data   = (void *)a;
1706   mat->ops[0] = MatOps_Values;
1707 
1708   mat->insertmode = NOT_SET_VALUES;
1709 
1710   /* build cache for off array entries formed */
1711   a->donotstash = PETSC_FALSE;
1712 
1713   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1714 
1715   /* stuff used for matrix vector multiply */
1716   a->lvec        = NULL;
1717   a->Mvctx       = NULL;
1718   a->roworiented = PETSC_TRUE;
1719 
1720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1741 #if defined(PETSC_HAVE_ELEMENTAL)
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1743 #endif
1744 #if defined(PETSC_HAVE_SCALAPACK)
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1746 #endif
1747 #if defined(PETSC_HAVE_CUDA)
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1749 #endif
1750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1753 #if defined(PETSC_HAVE_CUDA)
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1756 #endif
1757 #if defined(PETSC_HAVE_HIP)
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1761 #endif
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1767   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 /*MC
1772    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1773 
1774    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1775    and `MATMPIDENSE` otherwise.
1776 
1777    Options Database Key:
1778 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1779 
1780   Level: beginner
1781 
1782 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1783 M*/
1784 
1785 /*@
1786   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1787 
1788   Collective
1789 
1790   Input Parameters:
1791 + B    - the matrix
1792 - data - optional location of matrix data.  Set to `NULL` for PETSc
1793          to control all matrix memory allocation.
1794 
1795   Level: intermediate
1796 
1797   Notes:
1798   The dense format is fully compatible with standard Fortran
1799   storage by columns.
1800 
1801   The data input variable is intended primarily for Fortran programmers
1802   who wish to allocate their own matrix memory space.  Most users should
1803   set `data` to `NULL`.
1804 
1805 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1806 @*/
1807 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1808 {
1809   PetscFunctionBegin;
1810   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1811   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1812   PetscFunctionReturn(PETSC_SUCCESS);
1813 }
1814 
1815 /*@
1816   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1817   array provided by the user. This is useful to avoid copying an array
1818   into a matrix
1819 
1820   Not Collective
1821 
1822   Input Parameters:
1823 + mat   - the matrix
1824 - array - the array in column major order
1825 
1826   Level: developer
1827 
1828   Note:
1829   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1830 
1831   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1832   freed when the matrix is destroyed.
1833 
1834 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1835           `MatDenseReplaceArray()`
1836 @*/
1837 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1838 {
1839   PetscFunctionBegin;
1840   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1841   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1842   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1843 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1844   mat->offloadmask = PETSC_OFFLOAD_CPU;
1845 #endif
1846   PetscFunctionReturn(PETSC_SUCCESS);
1847 }
1848 
1849 /*@
1850   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1851 
1852   Not Collective
1853 
1854   Input Parameter:
1855 . mat - the matrix
1856 
1857   Level: developer
1858 
1859   Note:
1860   You can only call this after a call to `MatDensePlaceArray()`
1861 
1862 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1863 @*/
1864 PetscErrorCode MatDenseResetArray(Mat mat)
1865 {
1866   PetscFunctionBegin;
1867   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1868   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1869   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1870   PetscFunctionReturn(PETSC_SUCCESS);
1871 }
1872 
1873 /*@
1874   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1875   array provided by the user. This is useful to avoid copying an array
1876   into a matrix
1877 
1878   Not Collective
1879 
1880   Input Parameters:
1881 + mat   - the matrix
1882 - array - the array in column major order
1883 
1884   Level: developer
1885 
1886   Note:
1887   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1888 
1889   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1890   freed by the user. It will be freed when the matrix is destroyed.
1891 
1892 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1893 @*/
1894 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1895 {
1896   PetscFunctionBegin;
1897   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1898   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1899   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1900 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1901   mat->offloadmask = PETSC_OFFLOAD_CPU;
1902 #endif
1903   PetscFunctionReturn(PETSC_SUCCESS);
1904 }
1905 
1906 /*@
1907   MatCreateDense - Creates a matrix in `MATDENSE` format.
1908 
1909   Collective
1910 
1911   Input Parameters:
1912 + comm - MPI communicator
1913 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1914 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1915 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1916 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1917 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1918    to control all matrix memory allocation.
1919 
1920   Output Parameter:
1921 . A - the matrix
1922 
1923   Level: intermediate
1924 
1925   Notes:
1926   The dense format is fully compatible with standard Fortran
1927   storage by columns.
1928 
1929   Although local portions of the matrix are stored in column-major
1930   order, the matrix is partitioned across MPI ranks by row.
1931 
1932   The data input variable is intended primarily for Fortran programmers
1933   who wish to allocate their own matrix memory space.  Most users should
1934   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1935 
1936   The user MUST specify either the local or global matrix dimensions
1937   (possibly both).
1938 
1939 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1940 @*/
1941 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1942 {
1943   PetscFunctionBegin;
1944   PetscCall(MatCreate(comm, A));
1945   PetscCall(MatSetSizes(*A, m, n, M, N));
1946   PetscCall(MatSetType(*A, MATDENSE));
1947   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1948   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1949   PetscFunctionReturn(PETSC_SUCCESS);
1950 }
1951 
1952 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1953 {
1954   Mat           mat;
1955   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1956 
1957   PetscFunctionBegin;
1958   *newmat = NULL;
1959   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1960   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1961   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1962   a = (Mat_MPIDense *)mat->data;
1963 
1964   mat->factortype   = A->factortype;
1965   mat->assembled    = PETSC_TRUE;
1966   mat->preallocated = PETSC_TRUE;
1967 
1968   mat->insertmode = NOT_SET_VALUES;
1969   a->donotstash   = oldmat->donotstash;
1970 
1971   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1972   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1973 
1974   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1975 
1976   *newmat = mat;
1977   PetscFunctionReturn(PETSC_SUCCESS);
1978 }
1979 
1980 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1981 {
1982   PetscBool isbinary;
1983 #if defined(PETSC_HAVE_HDF5)
1984   PetscBool ishdf5;
1985 #endif
1986 
1987   PetscFunctionBegin;
1988   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1989   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1990   /* force binary viewer to load .info file if it has not yet done so */
1991   PetscCall(PetscViewerSetUp(viewer));
1992   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1993 #if defined(PETSC_HAVE_HDF5)
1994   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1995 #endif
1996   if (isbinary) {
1997     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1998 #if defined(PETSC_HAVE_HDF5)
1999   } else if (ishdf5) {
2000     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2001 #endif
2002   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2007 {
2008   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2009   Mat           a, b;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A;
2013   b = matB->A;
2014   PetscCall(MatEqual(a, b, flag));
2015   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2016   PetscFunctionReturn(PETSC_SUCCESS);
2017 }
2018 
2019 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2020 {
2021   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2022 
2023   PetscFunctionBegin;
2024   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2025   PetscCall(MatDestroy(&atb->atb));
2026   PetscCall(PetscFree(atb));
2027   PetscFunctionReturn(PETSC_SUCCESS);
2028 }
2029 
2030 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2031 {
2032   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2033 
2034   PetscFunctionBegin;
2035   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2036   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2037   PetscCall(PetscFree(abt));
2038   PetscFunctionReturn(PETSC_SUCCESS);
2039 }
2040 
2041 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2042 {
2043   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2044   Mat_TransMatMultDense *atb;
2045   MPI_Comm               comm;
2046   PetscMPIInt            size, *recvcounts;
2047   PetscScalar           *carray, *sendbuf;
2048   const PetscScalar     *atbarray;
2049   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2050   const PetscInt        *ranges;
2051 
2052   PetscFunctionBegin;
2053   MatCheckProduct(C, 3);
2054   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2055   atb        = (Mat_TransMatMultDense *)C->product->data;
2056   recvcounts = atb->recvcounts;
2057   sendbuf    = atb->sendbuf;
2058 
2059   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2060   PetscCallMPI(MPI_Comm_size(comm, &size));
2061 
2062   /* compute atbarray = aseq^T * bseq */
2063   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2064 
2065   PetscCall(MatGetOwnershipRanges(C, &ranges));
2066 
2067   /* arrange atbarray into sendbuf */
2068   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2069   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2070   for (proc = 0, k = 0; proc < size; proc++) {
2071     for (j = 0; j < cN; j++) {
2072       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2073     }
2074   }
2075   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2076 
2077   /* sum all atbarray to local values of C */
2078   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2079   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2080   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2081   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2082   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2083   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2088 {
2089   MPI_Comm               comm;
2090   PetscMPIInt            size;
2091   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2092   Mat_TransMatMultDense *atb;
2093   PetscBool              cisdense = PETSC_FALSE;
2094   const PetscInt        *ranges;
2095 
2096   PetscFunctionBegin;
2097   MatCheckProduct(C, 4);
2098   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2099   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2100   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2101     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2102   }
2103 
2104   /* create matrix product C */
2105   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2106 #if defined(PETSC_HAVE_CUDA)
2107   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2108 #endif
2109 #if defined(PETSC_HAVE_HIP)
2110   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2111 #endif
2112   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2113   PetscCall(MatSetUp(C));
2114 
2115   /* create data structure for reuse C */
2116   PetscCallMPI(MPI_Comm_size(comm, &size));
2117   PetscCall(PetscNew(&atb));
2118   cM = C->rmap->N;
2119   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2120   PetscCall(MatGetOwnershipRanges(C, &ranges));
2121   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2122   C->product->data    = atb;
2123   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2128 {
2129   MPI_Comm               comm;
2130   PetscMPIInt            i, size;
2131   PetscInt               maxRows, bufsiz;
2132   PetscMPIInt            tag;
2133   PetscInt               alg;
2134   Mat_MatTransMultDense *abt;
2135   Mat_Product           *product = C->product;
2136   PetscBool              flg;
2137 
2138   PetscFunctionBegin;
2139   MatCheckProduct(C, 4);
2140   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2141   /* check local size of A and B */
2142   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2143 
2144   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2145   alg = flg ? 0 : 1;
2146 
2147   /* setup matrix product C */
2148   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2149   PetscCall(MatSetType(C, MATMPIDENSE));
2150   PetscCall(MatSetUp(C));
2151   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2152 
2153   /* create data structure for reuse C */
2154   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2155   PetscCallMPI(MPI_Comm_size(comm, &size));
2156   PetscCall(PetscNew(&abt));
2157   abt->tag = tag;
2158   abt->alg = alg;
2159   switch (alg) {
2160   case 1: /* alg: "cyclic" */
2161     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2162     bufsiz = A->cmap->N * maxRows;
2163     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2164     break;
2165   default: /* alg: "allgatherv" */
2166     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2167     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2168     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2169     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2170     break;
2171   }
2172 
2173   C->product->data                = abt;
2174   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2175   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2176   PetscFunctionReturn(PETSC_SUCCESS);
2177 }
2178 
2179 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2180 {
2181   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2182   Mat_MatTransMultDense *abt;
2183   MPI_Comm               comm;
2184   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2185   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2186   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2187   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2188   const PetscScalar     *av, *bv;
2189   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2190   MPI_Request            reqs[2];
2191   const PetscInt        *ranges;
2192 
2193   PetscFunctionBegin;
2194   MatCheckProduct(C, 3);
2195   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2196   abt = (Mat_MatTransMultDense *)C->product->data;
2197   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2198   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2199   PetscCallMPI(MPI_Comm_size(comm, &size));
2200   PetscCall(MatDenseGetArrayRead(a->A, &av));
2201   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2202   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2203   PetscCall(MatDenseGetLDA(a->A, &i));
2204   PetscCall(PetscBLASIntCast(i, &alda));
2205   PetscCall(MatDenseGetLDA(b->A, &i));
2206   PetscCall(PetscBLASIntCast(i, &blda));
2207   PetscCall(MatDenseGetLDA(c->A, &i));
2208   PetscCall(PetscBLASIntCast(i, &clda));
2209   PetscCall(MatGetOwnershipRanges(B, &ranges));
2210   bn = B->rmap->n;
2211   if (blda == bn) {
2212     sendbuf = (PetscScalar *)bv;
2213   } else {
2214     sendbuf = abt->buf[0];
2215     for (k = 0, i = 0; i < cK; i++) {
2216       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2217     }
2218   }
2219   if (size > 1) {
2220     sendto   = (rank + size - 1) % size;
2221     recvfrom = (rank + size + 1) % size;
2222   } else {
2223     sendto = recvfrom = 0;
2224   }
2225   PetscCall(PetscBLASIntCast(cK, &ck));
2226   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2227   recvisfrom = rank;
2228   for (i = 0; i < size; i++) {
2229     /* we have finished receiving in sending, bufs can be read/modified */
2230     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2231     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2232 
2233     if (nextrecvisfrom != rank) {
2234       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2235       sendsiz = cK * bn;
2236       recvsiz = cK * nextbn;
2237       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2238       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2239       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2240     }
2241 
2242     /* local aseq * sendbuf^T */
2243     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2244     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2245 
2246     if (nextrecvisfrom != rank) {
2247       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2248       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2249     }
2250     bn         = nextbn;
2251     recvisfrom = nextrecvisfrom;
2252     sendbuf    = recvbuf;
2253   }
2254   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2255   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2256   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2257   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2258   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2259   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2260   PetscFunctionReturn(PETSC_SUCCESS);
2261 }
2262 
2263 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2264 {
2265   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2266   Mat_MatTransMultDense *abt;
2267   MPI_Comm               comm;
2268   PetscMPIInt            size, ibn;
2269   PetscScalar           *cv, *sendbuf, *recvbuf;
2270   const PetscScalar     *av, *bv;
2271   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2272   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2273   PetscBLASInt           cm, cn, ck, alda, clda;
2274 
2275   PetscFunctionBegin;
2276   MatCheckProduct(C, 3);
2277   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2278   abt = (Mat_MatTransMultDense *)C->product->data;
2279   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2280   PetscCallMPI(MPI_Comm_size(comm, &size));
2281   PetscCall(MatDenseGetArrayRead(a->A, &av));
2282   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2283   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2284   PetscCall(MatDenseGetLDA(a->A, &i));
2285   PetscCall(PetscBLASIntCast(i, &alda));
2286   PetscCall(MatDenseGetLDA(b->A, &blda));
2287   PetscCall(MatDenseGetLDA(c->A, &i));
2288   PetscCall(PetscBLASIntCast(i, &clda));
2289   /* copy transpose of B into buf[0] */
2290   bn      = B->rmap->n;
2291   sendbuf = abt->buf[0];
2292   recvbuf = abt->buf[1];
2293   for (k = 0, j = 0; j < bn; j++) {
2294     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2295   }
2296   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2297   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2298   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2299   PetscCall(PetscBLASIntCast(cK, &ck));
2300   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2301   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2302   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2303   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2304   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2305   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2306   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2307   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2308   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2309   PetscFunctionReturn(PETSC_SUCCESS);
2310 }
2311 
2312 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2313 {
2314   Mat_MatTransMultDense *abt;
2315 
2316   PetscFunctionBegin;
2317   MatCheckProduct(C, 3);
2318   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2319   abt = (Mat_MatTransMultDense *)C->product->data;
2320   switch (abt->alg) {
2321   case 1:
2322     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2323     break;
2324   default:
2325     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2326     break;
2327   }
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2332 {
2333   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2334 
2335   PetscFunctionBegin;
2336   PetscCall(MatDestroy(&ab->Ce));
2337   PetscCall(MatDestroy(&ab->Ae));
2338   PetscCall(MatDestroy(&ab->Be));
2339   PetscCall(PetscFree(ab));
2340   PetscFunctionReturn(PETSC_SUCCESS);
2341 }
2342 
2343 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2344 {
2345   Mat_MatMultDense *ab;
2346   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2347 
2348   PetscFunctionBegin;
2349   MatCheckProduct(C, 3);
2350   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2351   ab = (Mat_MatMultDense *)C->product->data;
2352   if (ab->Ae && ab->Ce) {
2353 #if PetscDefined(HAVE_ELEMENTAL)
2354     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2355     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2356     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2357     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2358 #else
2359     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2360 #endif
2361   } else {
2362     const PetscScalar *read;
2363     PetscScalar       *write;
2364     PetscInt           lda;
2365 
2366     PetscCall(MatDenseGetLDA(B, &lda));
2367     PetscCall(MatDenseGetArrayRead(B, &read));
2368     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2369     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2370     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2371       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2372       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2373     }
2374     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2375     PetscCall(MatDenseRestoreArrayRead(B, &read));
2376     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2377   }
2378   PetscFunctionReturn(PETSC_SUCCESS);
2379 }
2380 
2381 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2382 {
2383   Mat_Product      *product = C->product;
2384   PetscInt          alg;
2385   Mat_MatMultDense *ab;
2386   PetscBool         flg;
2387 
2388   PetscFunctionBegin;
2389   MatCheckProduct(C, 4);
2390   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2391   /* check local size of A and B */
2392   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2393              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2394 
2395   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2396   alg = flg ? 0 : 1;
2397 
2398   /* setup C */
2399   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2400   PetscCall(MatSetType(C, MATMPIDENSE));
2401   PetscCall(MatSetUp(C));
2402 
2403   /* create data structure for reuse Cdense */
2404   PetscCall(PetscNew(&ab));
2405 
2406   switch (alg) {
2407   case 1: /* alg: "elemental" */
2408 #if PetscDefined(HAVE_ELEMENTAL)
2409     /* create elemental matrices Ae and Be */
2410     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2411     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2412     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2413     PetscCall(MatSetUp(ab->Ae));
2414     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2415 
2416     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2417     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2418     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2419     PetscCall(MatSetUp(ab->Be));
2420     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2421 
2422     /* compute symbolic Ce = Ae*Be */
2423     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2424     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2425 #else
2426     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2427 #endif
2428     break;
2429   default: /* alg: "petsc" */
2430     ab->Ae = NULL;
2431     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2432     ab->Ce = NULL;
2433     break;
2434   }
2435 
2436   C->product->data       = ab;
2437   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2438   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2439   PetscFunctionReturn(PETSC_SUCCESS);
2440 }
2441 
2442 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2443 {
2444   Mat_Product *product     = C->product;
2445   const char  *algTypes[2] = {"petsc", "elemental"};
2446   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2447   PetscBool    flg = PETSC_FALSE;
2448 
2449   PetscFunctionBegin;
2450   /* Set default algorithm */
2451   alg = 0; /* default is PETSc */
2452   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2453   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2454 
2455   /* Get runtime option */
2456   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2457   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2458   PetscOptionsEnd();
2459   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2460 
2461   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2462   C->ops->productsymbolic = MatProductSymbolic_AB;
2463   PetscFunctionReturn(PETSC_SUCCESS);
2464 }
2465 
2466 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2467 {
2468   Mat_Product *product = C->product;
2469   Mat          A = product->A, B = product->B;
2470 
2471   PetscFunctionBegin;
2472   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2473              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2474   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2475   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2476   PetscFunctionReturn(PETSC_SUCCESS);
2477 }
2478 
2479 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2480 {
2481   Mat_Product *product     = C->product;
2482   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2483   PetscInt     alg, nalg = 2;
2484   PetscBool    flg = PETSC_FALSE;
2485 
2486   PetscFunctionBegin;
2487   /* Set default algorithm */
2488   alg = 0; /* default is allgatherv */
2489   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2490   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2491 
2492   /* Get runtime option */
2493   if (product->api_user) {
2494     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2495     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2496     PetscOptionsEnd();
2497   } else {
2498     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2499     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2500     PetscOptionsEnd();
2501   }
2502   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2503 
2504   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2505   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2506   PetscFunctionReturn(PETSC_SUCCESS);
2507 }
2508 
2509 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2510 {
2511   Mat_Product *product = C->product;
2512 
2513   PetscFunctionBegin;
2514   switch (product->type) {
2515   case MATPRODUCT_AB:
2516     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2517     break;
2518   case MATPRODUCT_AtB:
2519     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2520     break;
2521   case MATPRODUCT_ABt:
2522     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2523     break;
2524   default:
2525     break;
2526   }
2527   PetscFunctionReturn(PETSC_SUCCESS);
2528 }
2529