xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6dd63270497ad23dcf16ae500a87ff2b2a0b7474)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right-hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
519   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
520   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
521   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
522   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
523   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
524   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
525   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecSet(yy, 0.0));
539   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555 
556   PetscFunctionBegin;
557   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
558   PetscCall(VecSet(yy, 0.0));
559   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
560   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
561   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
562   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
563   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
564   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
565   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
566   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
571 {
572   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
573   const PetscScalar *ax;
574   PetscScalar       *ay;
575   PetscMemType       axmtype, aymtype;
576 
577   PetscFunctionBegin;
578   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
579   PetscCall(VecCopy(yy, zz));
580   PetscMPIInt rank;
581   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
582   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
583   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
584   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
585   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
586   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
587   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
588   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
593 {
594   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
595   const PetscScalar *ax;
596   PetscScalar       *ay;
597   PetscMemType       axmtype, aymtype;
598 
599   PetscFunctionBegin;
600   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
601   PetscCall(VecCopy(yy, zz));
602   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
603   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
604   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
605   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
606   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
607   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
608   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
609   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
635 {
636   PetscFunctionBegin;
637   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
642 {
643   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
644   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
645   PetscScalar       *x;
646   const PetscScalar *av;
647 
648   PetscFunctionBegin;
649   PetscCall(VecGetArray(v, &x));
650   PetscCall(VecGetSize(v, &ng));
651   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
652   PetscCall(VecGetLocalSize(v, &nl));
653   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
654   radd = A->rmap->rstart * m;
655   PetscCall(MatDenseGetArrayRead(a->A, &av));
656   PetscCall(MatDenseGetLDA(a->A, &lda));
657   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
658   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
659   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
660   PetscCall(VecRestoreArray(v, &x));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
665 {
666   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
667 
668   PetscFunctionBegin;
669   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
670   PetscCall(MatStashDestroy_Private(&mat->stash));
671   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
672   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
673   PetscCall(MatDestroy(&mdn->A));
674   PetscCall(VecDestroy(&mdn->lvec));
675   PetscCall(PetscSFDestroy(&mdn->Mvctx));
676   PetscCall(VecDestroy(&mdn->cvec));
677   PetscCall(MatDestroy(&mdn->cmat));
678 
679   PetscCall(PetscFree(mat->data));
680   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
681 
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
695 #if defined(PETSC_HAVE_ELEMENTAL)
696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
697 #endif
698 #if defined(PETSC_HAVE_SCALAPACK)
699   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
700 #endif
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
703   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
704 #if defined(PETSC_HAVE_CUDA)
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
723 #endif
724 #if defined(PETSC_HAVE_HIP)
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
743 #endif
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
757 
758   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 #include <petscdraw.h>
763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
764 {
765   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
766   PetscMPIInt       rank;
767   PetscViewerType   vtype;
768   PetscBool         iascii, isdraw;
769   PetscViewer       sviewer;
770   PetscViewerFormat format;
771 
772   PetscFunctionBegin;
773   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
775   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
776   if (iascii) {
777     PetscCall(PetscViewerGetType(viewer, &vtype));
778     PetscCall(PetscViewerGetFormat(viewer, &format));
779     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
780       MatInfo info;
781       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
782       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
783       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
784                                                    (PetscInt)info.memory));
785       PetscCall(PetscViewerFlush(viewer));
786       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
787       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
788       PetscFunctionReturn(PETSC_SUCCESS);
789     } else if (format == PETSC_VIEWER_ASCII_INFO) {
790       PetscFunctionReturn(PETSC_SUCCESS);
791     }
792   } else if (isdraw) {
793     PetscDraw draw;
794     PetscBool isnull;
795 
796     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
797     PetscCall(PetscDrawIsNull(draw, &isnull));
798     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
799   }
800 
801   {
802     /* assemble the entire matrix onto first processor. */
803     Mat          A;
804     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
805     PetscInt    *cols;
806     PetscScalar *vals;
807 
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
809     if (rank == 0) {
810       PetscCall(MatSetSizes(A, M, N, M, N));
811     } else {
812       PetscCall(MatSetSizes(A, 0, 0, M, N));
813     }
814     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
815     PetscCall(MatSetType(A, MATMPIDENSE));
816     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
817 
818     /* Copy the matrix ... This isn't the most efficient means,
819        but it's quick for now */
820     A->insertmode = INSERT_VALUES;
821 
822     row = mat->rmap->rstart;
823     m   = mdn->A->rmap->n;
824     for (i = 0; i < m; i++) {
825       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
826       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
827       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
828       row++;
829     }
830 
831     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
833     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
834     if (rank == 0) {
835       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
836       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
837     }
838     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
839     PetscCall(MatDestroy(&A));
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
845 {
846   PetscBool iascii, isbinary, isdraw, issocket;
847 
848   PetscFunctionBegin;
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
852   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
853 
854   if (iascii || issocket || isdraw) {
855     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
856   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
861 {
862   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
863   Mat            mdn = mat->A;
864   PetscLogDouble isend[5], irecv[5];
865 
866   PetscFunctionBegin;
867   info->block_size = 1.0;
868 
869   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
870 
871   isend[0] = info->nz_used;
872   isend[1] = info->nz_allocated;
873   isend[2] = info->nz_unneeded;
874   isend[3] = info->memory;
875   isend[4] = info->mallocs;
876   if (flag == MAT_LOCAL) {
877     info->nz_used      = isend[0];
878     info->nz_allocated = isend[1];
879     info->nz_unneeded  = isend[2];
880     info->memory       = isend[3];
881     info->mallocs      = isend[4];
882   } else if (flag == MAT_GLOBAL_MAX) {
883     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
884 
885     info->nz_used      = irecv[0];
886     info->nz_allocated = irecv[1];
887     info->nz_unneeded  = irecv[2];
888     info->memory       = irecv[3];
889     info->mallocs      = irecv[4];
890   } else if (flag == MAT_GLOBAL_SUM) {
891     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
892 
893     info->nz_used      = irecv[0];
894     info->nz_allocated = irecv[1];
895     info->nz_unneeded  = irecv[2];
896     info->memory       = irecv[3];
897     info->mallocs      = irecv[4];
898   }
899   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
900   info->fill_ratio_needed = 0;
901   info->factor_mallocs    = 0;
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
906 {
907   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
908 
909   PetscFunctionBegin;
910   switch (op) {
911   case MAT_NEW_NONZERO_LOCATIONS:
912   case MAT_NEW_NONZERO_LOCATION_ERR:
913   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914     MatCheckPreallocated(A, 1);
915     PetscCall(MatSetOption(a->A, op, flg));
916     break;
917   case MAT_ROW_ORIENTED:
918     MatCheckPreallocated(A, 1);
919     a->roworiented = flg;
920     PetscCall(MatSetOption(a->A, op, flg));
921     break;
922   case MAT_IGNORE_OFF_PROC_ENTRIES:
923     a->donotstash = flg;
924     break;
925   case MAT_SYMMETRIC:
926   case MAT_STRUCTURALLY_SYMMETRIC:
927   case MAT_HERMITIAN:
928   case MAT_SYMMETRY_ETERNAL:
929   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
930   case MAT_SPD:
931   case MAT_SPD_ETERNAL:
932     /* if the diagonal matrix is square it inherits some of the properties above */
933     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
934     break;
935   default:
936     break;
937   }
938   PetscFunctionReturn(PETSC_SUCCESS);
939 }
940 
941 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
942 {
943   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
944   const PetscScalar *l;
945   PetscScalar        x, *v, *vv, *r;
946   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
947 
948   PetscFunctionBegin;
949   PetscCall(MatDenseGetArray(mdn->A, &vv));
950   PetscCall(MatDenseGetLDA(mdn->A, &lda));
951   PetscCall(MatGetLocalSize(A, &s2, &s3));
952   if (ll) {
953     PetscCall(VecGetLocalSize(ll, &s2a));
954     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
955     PetscCall(VecGetArrayRead(ll, &l));
956     for (i = 0; i < m; i++) {
957       x = l[i];
958       v = vv + i;
959       for (j = 0; j < n; j++) {
960         (*v) *= x;
961         v += lda;
962       }
963     }
964     PetscCall(VecRestoreArrayRead(ll, &l));
965     PetscCall(PetscLogFlops(1.0 * n * m));
966   }
967   if (rr) {
968     const PetscScalar *ar;
969 
970     PetscCall(VecGetLocalSize(rr, &s3a));
971     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
972     PetscCall(VecGetArrayRead(rr, &ar));
973     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
974     PetscCall(VecGetArray(mdn->lvec, &r));
975     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
976     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
977     PetscCall(VecRestoreArrayRead(rr, &ar));
978     for (i = 0; i < n; i++) {
979       x = r[i];
980       v = vv + i * lda;
981       for (j = 0; j < m; j++) (*v++) *= x;
982     }
983     PetscCall(VecRestoreArray(mdn->lvec, &r));
984     PetscCall(PetscLogFlops(1.0 * n * m));
985   }
986   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
987   PetscFunctionReturn(PETSC_SUCCESS);
988 }
989 
990 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
991 {
992   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
993   PetscInt           i, j;
994   PetscMPIInt        size;
995   PetscReal          sum = 0.0;
996   const PetscScalar *av, *v;
997 
998   PetscFunctionBegin;
999   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1000   v = av;
1001   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1002   if (size == 1) {
1003     PetscCall(MatNorm(mdn->A, type, nrm));
1004   } else {
1005     if (type == NORM_FROBENIUS) {
1006       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1007         sum += PetscRealPart(PetscConj(*v) * (*v));
1008         v++;
1009       }
1010       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1011       *nrm = PetscSqrtReal(*nrm);
1012       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1013     } else if (type == NORM_1) {
1014       PetscReal *tmp;
1015 
1016       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1017       *nrm = 0.0;
1018       v    = av;
1019       for (j = 0; j < mdn->A->cmap->n; j++) {
1020         for (i = 0; i < mdn->A->rmap->n; i++) {
1021           tmp[j] += PetscAbsScalar(*v);
1022           v++;
1023         }
1024       }
1025       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1026       for (j = 0; j < A->cmap->N; j++) {
1027         if (tmp[j] > *nrm) *nrm = tmp[j];
1028       }
1029       PetscCall(PetscFree(tmp));
1030       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1031     } else if (type == NORM_INFINITY) { /* max row norm */
1032       PetscCall(MatNorm(mdn->A, type, nrm));
1033       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1034     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1035   }
1036   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1041 {
1042   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1043   Mat           B;
1044   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1045   PetscInt      j, i, lda;
1046   PetscScalar  *v;
1047 
1048   PetscFunctionBegin;
1049   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1050   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1051     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1052     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1053     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1054     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1055   } else B = *matout;
1056 
1057   m = a->A->rmap->n;
1058   n = a->A->cmap->n;
1059   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1060   PetscCall(MatDenseGetLDA(a->A, &lda));
1061   PetscCall(PetscMalloc1(m, &rwork));
1062   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1063   for (j = 0; j < n; j++) {
1064     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1065     v = PetscSafePointerPlusOffset(v, lda);
1066   }
1067   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1068   PetscCall(PetscFree(rwork));
1069   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1070   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1071   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1072     *matout = B;
1073   } else {
1074     PetscCall(MatHeaderMerge(A, &B));
1075   }
1076   PetscFunctionReturn(PETSC_SUCCESS);
1077 }
1078 
1079 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1080 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1081 
1082 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1083 {
1084   PetscFunctionBegin;
1085   PetscCall(PetscLayoutSetUp(A->rmap));
1086   PetscCall(PetscLayoutSetUp(A->cmap));
1087   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1088   PetscFunctionReturn(PETSC_SUCCESS);
1089 }
1090 
1091 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1092 {
1093   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1101 {
1102   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(MatConjugate(a->A));
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1110 {
1111   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(MatRealPart(a->A));
1115   PetscFunctionReturn(PETSC_SUCCESS);
1116 }
1117 
1118 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1119 {
1120   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1121 
1122   PetscFunctionBegin;
1123   PetscCall(MatImaginaryPart(a->A));
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1128 {
1129   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1130 
1131   PetscFunctionBegin;
1132   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1133   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1134   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1139 
1140 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1141 {
1142   PetscInt      i, m, n;
1143   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(MatGetSize(A, &m, &n));
1147   if (type == REDUCTION_MEAN_REALPART) {
1148     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1149   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1150     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1151   } else {
1152     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1153   }
1154   if (type == NORM_2) {
1155     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1156   }
1157   if (type == NORM_INFINITY) {
1158     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1159   } else {
1160     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1161   }
1162   if (type == NORM_2) {
1163     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1164   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1165     for (i = 0; i < n; i++) reductions[i] /= m;
1166   }
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1171 {
1172   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1173 
1174   PetscFunctionBegin;
1175   PetscCall(MatSetRandom(d->A, rctx));
1176 #if defined(PETSC_HAVE_DEVICE)
1177   x->offloadmask = d->A->offloadmask;
1178 #endif
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1183 {
1184   PetscFunctionBegin;
1185   *missing = PETSC_FALSE;
1186   PetscFunctionReturn(PETSC_SUCCESS);
1187 }
1188 
1189 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1190 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1191 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1192 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1193 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1194 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1195 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1196 
1197 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1198                                        MatGetRow_MPIDense,
1199                                        MatRestoreRow_MPIDense,
1200                                        MatMult_MPIDense,
1201                                        /*  4*/ MatMultAdd_MPIDense,
1202                                        MatMultTranspose_MPIDense,
1203                                        MatMultTransposeAdd_MPIDense,
1204                                        NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        /* 10*/ NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        NULL,
1211                                        MatTranspose_MPIDense,
1212                                        /* 15*/ MatGetInfo_MPIDense,
1213                                        MatEqual_MPIDense,
1214                                        MatGetDiagonal_MPIDense,
1215                                        MatDiagonalScale_MPIDense,
1216                                        MatNorm_MPIDense,
1217                                        /* 20*/ MatAssemblyBegin_MPIDense,
1218                                        MatAssemblyEnd_MPIDense,
1219                                        MatSetOption_MPIDense,
1220                                        MatZeroEntries_MPIDense,
1221                                        /* 24*/ MatZeroRows_MPIDense,
1222                                        NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        /* 29*/ MatSetUp_MPIDense,
1227                                        NULL,
1228                                        NULL,
1229                                        MatGetDiagonalBlock_MPIDense,
1230                                        NULL,
1231                                        /* 34*/ MatDuplicate_MPIDense,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        /* 39*/ MatAXPY_MPIDense,
1237                                        MatCreateSubMatrices_MPIDense,
1238                                        NULL,
1239                                        MatGetValues_MPIDense,
1240                                        MatCopy_MPIDense,
1241                                        /* 44*/ NULL,
1242                                        MatScale_MPIDense,
1243                                        MatShift_MPIDense,
1244                                        NULL,
1245                                        NULL,
1246                                        /* 49*/ MatSetRandom_MPIDense,
1247                                        NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        NULL,
1251                                        /* 54*/ NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1257                                        MatDestroy_MPIDense,
1258                                        MatView_MPIDense,
1259                                        NULL,
1260                                        NULL,
1261                                        /* 64*/ NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /* 69*/ NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        NULL,
1270                                        NULL,
1271                                        /* 74*/ NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        /* 79*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        /* 83*/ MatLoad_MPIDense,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        /* 89*/ NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        NULL,
1291                                        /* 94*/ NULL,
1292                                        NULL,
1293                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1294                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1295                                        NULL,
1296                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1297                                        NULL,
1298                                        NULL,
1299                                        MatConjugate_MPIDense,
1300                                        NULL,
1301                                        /*104*/ NULL,
1302                                        MatRealPart_MPIDense,
1303                                        MatImaginaryPart_MPIDense,
1304                                        NULL,
1305                                        NULL,
1306                                        /*109*/ NULL,
1307                                        NULL,
1308                                        NULL,
1309                                        MatGetColumnVector_MPIDense,
1310                                        MatMissingDiagonal_MPIDense,
1311                                        /*114*/ NULL,
1312                                        NULL,
1313                                        NULL,
1314                                        NULL,
1315                                        NULL,
1316                                        /*119*/ NULL,
1317                                        NULL,
1318                                        MatMultHermitianTranspose_MPIDense,
1319                                        MatMultHermitianTransposeAdd_MPIDense,
1320                                        NULL,
1321                                        /*124*/ NULL,
1322                                        MatGetColumnReductions_MPIDense,
1323                                        NULL,
1324                                        NULL,
1325                                        NULL,
1326                                        /*129*/ NULL,
1327                                        NULL,
1328                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1329                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1330                                        NULL,
1331                                        /*134*/ NULL,
1332                                        NULL,
1333                                        NULL,
1334                                        NULL,
1335                                        NULL,
1336                                        /*139*/ NULL,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1342                                        /*145*/ NULL,
1343                                        NULL,
1344                                        NULL,
1345                                        NULL,
1346                                        NULL,
1347                                        /*150*/ NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        NULL,
1352                                        /*155*/ NULL,
1353                                        NULL};
1354 
1355 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1356 {
1357   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1358   MatType       mtype = MATSEQDENSE;
1359 
1360   PetscFunctionBegin;
1361   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1362   PetscCall(PetscLayoutSetUp(mat->rmap));
1363   PetscCall(PetscLayoutSetUp(mat->cmap));
1364   if (!a->A) {
1365     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1366     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1367   }
1368 #if defined(PETSC_HAVE_CUDA)
1369   PetscBool iscuda;
1370   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1371   if (iscuda) mtype = MATSEQDENSECUDA;
1372 #endif
1373 #if defined(PETSC_HAVE_HIP)
1374   PetscBool iship;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1376   if (iship) mtype = MATSEQDENSEHIP;
1377 #endif
1378   PetscCall(MatSetType(a->A, mtype));
1379   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1380 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1381   mat->offloadmask = a->A->offloadmask;
1382 #endif
1383   mat->preallocated = PETSC_TRUE;
1384   mat->assembled    = PETSC_TRUE;
1385   PetscFunctionReturn(PETSC_SUCCESS);
1386 }
1387 
1388 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1389 {
1390   Mat B, C;
1391 
1392   PetscFunctionBegin;
1393   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1394   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1395   PetscCall(MatDestroy(&C));
1396   if (reuse == MAT_REUSE_MATRIX) {
1397     C = *newmat;
1398   } else C = NULL;
1399   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1400   PetscCall(MatDestroy(&B));
1401   if (reuse == MAT_INPLACE_MATRIX) {
1402     PetscCall(MatHeaderReplace(A, &C));
1403   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1404   PetscFunctionReturn(PETSC_SUCCESS);
1405 }
1406 
1407 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1408 {
1409   Mat B, C;
1410 
1411   PetscFunctionBegin;
1412   PetscCall(MatDenseGetLocalMatrix(A, &C));
1413   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1414   if (reuse == MAT_REUSE_MATRIX) {
1415     C = *newmat;
1416   } else C = NULL;
1417   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1418   PetscCall(MatDestroy(&B));
1419   if (reuse == MAT_INPLACE_MATRIX) {
1420     PetscCall(MatHeaderReplace(A, &C));
1421   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1422   PetscFunctionReturn(PETSC_SUCCESS);
1423 }
1424 
1425 #if defined(PETSC_HAVE_ELEMENTAL)
1426 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1427 {
1428   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1429   Mat           mat_elemental;
1430   PetscScalar  *v;
1431   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1432 
1433   PetscFunctionBegin;
1434   if (reuse == MAT_REUSE_MATRIX) {
1435     mat_elemental = *newmat;
1436     PetscCall(MatZeroEntries(*newmat));
1437   } else {
1438     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1439     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1440     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1441     PetscCall(MatSetUp(mat_elemental));
1442     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1443   }
1444 
1445   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1446   for (i = 0; i < N; i++) cols[i] = i;
1447   for (i = 0; i < m; i++) rows[i] = rstart + i;
1448 
1449   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1450   PetscCall(MatDenseGetArray(A, &v));
1451   PetscCall(MatDenseGetLDA(a->A, &lda));
1452   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1453   else {
1454     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1455   }
1456   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1457   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1458   PetscCall(MatDenseRestoreArray(A, &v));
1459   PetscCall(PetscFree2(rows, cols));
1460 
1461   if (reuse == MAT_INPLACE_MATRIX) {
1462     PetscCall(MatHeaderReplace(A, &mat_elemental));
1463   } else {
1464     *newmat = mat_elemental;
1465   }
1466   PetscFunctionReturn(PETSC_SUCCESS);
1467 }
1468 #endif
1469 
1470 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1471 {
1472   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1473 
1474   PetscFunctionBegin;
1475   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1476   PetscFunctionReturn(PETSC_SUCCESS);
1477 }
1478 
1479 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1480 {
1481   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1482 
1483   PetscFunctionBegin;
1484   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1489 {
1490   Mat_MPIDense *mat;
1491   PetscInt      m, nloc, N;
1492 
1493   PetscFunctionBegin;
1494   PetscCall(MatGetSize(inmat, &m, &N));
1495   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1496   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1497     PetscInt sum;
1498 
1499     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1500     /* Check sum(n) = N */
1501     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1502     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1503 
1504     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1505     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1506   }
1507 
1508   /* numeric phase */
1509   mat = (Mat_MPIDense *)(*outmat)->data;
1510   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1511   PetscFunctionReturn(PETSC_SUCCESS);
1512 }
1513 
1514 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1515 {
1516   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1517   PetscInt      lda;
1518 
1519   PetscFunctionBegin;
1520   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1521   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1522   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1523   a->vecinuse = col + 1;
1524   PetscCall(MatDenseGetLDA(a->A, &lda));
1525   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1526   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1527   *v = a->cvec;
1528   PetscFunctionReturn(PETSC_SUCCESS);
1529 }
1530 
1531 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1532 {
1533   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1534 
1535   PetscFunctionBegin;
1536   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1537   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1538   VecCheckAssembled(a->cvec);
1539   a->vecinuse = 0;
1540   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1541   PetscCall(VecResetArray(a->cvec));
1542   if (v) *v = NULL;
1543   PetscFunctionReturn(PETSC_SUCCESS);
1544 }
1545 
1546 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1547 {
1548   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1549   PetscInt      lda;
1550 
1551   PetscFunctionBegin;
1552   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1553   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1554   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1555   a->vecinuse = col + 1;
1556   PetscCall(MatDenseGetLDA(a->A, &lda));
1557   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1558   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1559   PetscCall(VecLockReadPush(a->cvec));
1560   *v = a->cvec;
1561   PetscFunctionReturn(PETSC_SUCCESS);
1562 }
1563 
1564 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1565 {
1566   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1567 
1568   PetscFunctionBegin;
1569   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1570   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1571   VecCheckAssembled(a->cvec);
1572   a->vecinuse = 0;
1573   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1574   PetscCall(VecLockReadPop(a->cvec));
1575   PetscCall(VecResetArray(a->cvec));
1576   if (v) *v = NULL;
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1581 {
1582   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1583   PetscInt      lda;
1584 
1585   PetscFunctionBegin;
1586   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1587   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1588   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1589   a->vecinuse = col + 1;
1590   PetscCall(MatDenseGetLDA(a->A, &lda));
1591   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1592   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1593   *v = a->cvec;
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1598 {
1599   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1600 
1601   PetscFunctionBegin;
1602   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1603   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1604   VecCheckAssembled(a->cvec);
1605   a->vecinuse = 0;
1606   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1607   PetscCall(VecResetArray(a->cvec));
1608   if (v) *v = NULL;
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1613 {
1614   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1615   Mat_MPIDense *c;
1616   MPI_Comm      comm;
1617   PetscInt      pbegin, pend;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1621   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1622   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1623   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1624   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1625   if (!a->cmat) {
1626     PetscCall(MatCreate(comm, &a->cmat));
1627     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1628     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1629     else {
1630       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1631       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1632       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1633     }
1634     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1635     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1636   } else {
1637     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1638     if (same && a->cmat->rmap->N != A->rmap->N) {
1639       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1640       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1641     }
1642     if (!same) {
1643       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1644       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1645       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1646       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1647       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1648     }
1649     if (cend - cbegin != a->cmat->cmap->N) {
1650       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1651       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1652       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1653       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1654     }
1655   }
1656   c = (Mat_MPIDense *)a->cmat->data;
1657   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1658   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1659 
1660   a->cmat->preallocated = PETSC_TRUE;
1661   a->cmat->assembled    = PETSC_TRUE;
1662 #if defined(PETSC_HAVE_DEVICE)
1663   a->cmat->offloadmask = c->A->offloadmask;
1664 #endif
1665   a->matinuse = cbegin + 1;
1666   *v          = a->cmat;
1667   PetscFunctionReturn(PETSC_SUCCESS);
1668 }
1669 
1670 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1671 {
1672   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1673   Mat_MPIDense *c;
1674 
1675   PetscFunctionBegin;
1676   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1677   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1678   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1679   a->matinuse = 0;
1680   c           = (Mat_MPIDense *)a->cmat->data;
1681   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1682   if (v) *v = NULL;
1683 #if defined(PETSC_HAVE_DEVICE)
1684   A->offloadmask = a->A->offloadmask;
1685 #endif
1686   PetscFunctionReturn(PETSC_SUCCESS);
1687 }
1688 
1689 /*MC
1690    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1691 
1692    Options Database Key:
1693 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1694 
1695   Level: beginner
1696 
1697 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1698 M*/
1699 PetscErrorCode MatCreate_MPIDense(Mat mat)
1700 {
1701   Mat_MPIDense *a;
1702 
1703   PetscFunctionBegin;
1704   PetscCall(PetscNew(&a));
1705   mat->data   = (void *)a;
1706   mat->ops[0] = MatOps_Values;
1707 
1708   mat->insertmode = NOT_SET_VALUES;
1709 
1710   /* build cache for off array entries formed */
1711   a->donotstash = PETSC_FALSE;
1712 
1713   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1714 
1715   /* stuff used for matrix vector multiply */
1716   a->lvec        = NULL;
1717   a->Mvctx       = NULL;
1718   a->roworiented = PETSC_TRUE;
1719 
1720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1741 #if defined(PETSC_HAVE_ELEMENTAL)
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1743 #endif
1744 #if defined(PETSC_HAVE_SCALAPACK)
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1746 #endif
1747 #if defined(PETSC_HAVE_CUDA)
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1749 #endif
1750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1753 #if defined(PETSC_HAVE_CUDA)
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1756 #endif
1757 #if defined(PETSC_HAVE_HIP)
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1761 #endif
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1767   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 /*MC
1772    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1773 
1774    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1775    and `MATMPIDENSE` otherwise.
1776 
1777    Options Database Key:
1778 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1779 
1780   Level: beginner
1781 
1782 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1783 M*/
1784 
1785 /*@
1786   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1787 
1788   Collective
1789 
1790   Input Parameters:
1791 + B    - the matrix
1792 - data - optional location of matrix data.  Set to `NULL` for PETSc
1793          to control all matrix memory allocation.
1794 
1795   Level: intermediate
1796 
1797   Notes:
1798   The dense format is fully compatible with standard Fortran
1799   storage by columns.
1800 
1801   The data input variable is intended primarily for Fortran programmers
1802   who wish to allocate their own matrix memory space.  Most users should
1803   set `data` to `NULL`.
1804 
1805 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1806 @*/
1807 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1808 {
1809   PetscFunctionBegin;
1810   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1811   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1812   PetscFunctionReturn(PETSC_SUCCESS);
1813 }
1814 
1815 /*@
1816   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1817   array provided by the user. This is useful to avoid copying an array
1818   into a matrix
1819 
1820   Not Collective
1821 
1822   Input Parameters:
1823 + mat   - the matrix
1824 - array - the array in column major order
1825 
1826   Level: developer
1827 
1828   Note:
1829   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1830   freed when the matrix is destroyed.
1831 
1832 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1833           `MatDenseReplaceArray()`
1834 @*/
1835 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1836 {
1837   PetscFunctionBegin;
1838   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1839   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1840   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1841 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1842   mat->offloadmask = PETSC_OFFLOAD_CPU;
1843 #endif
1844   PetscFunctionReturn(PETSC_SUCCESS);
1845 }
1846 
1847 /*@
1848   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1849 
1850   Not Collective
1851 
1852   Input Parameter:
1853 . mat - the matrix
1854 
1855   Level: developer
1856 
1857   Note:
1858   You can only call this after a call to `MatDensePlaceArray()`
1859 
1860 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1861 @*/
1862 PetscErrorCode MatDenseResetArray(Mat mat)
1863 {
1864   PetscFunctionBegin;
1865   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1866   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1867   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1868   PetscFunctionReturn(PETSC_SUCCESS);
1869 }
1870 
1871 /*@
1872   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1873   array provided by the user. This is useful to avoid copying an array
1874   into a matrix
1875 
1876   Not Collective
1877 
1878   Input Parameters:
1879 + mat   - the matrix
1880 - array - the array in column major order
1881 
1882   Level: developer
1883 
1884   Note:
1885   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1886   freed by the user. It will be freed when the matrix is destroyed.
1887 
1888 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1889 @*/
1890 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1891 {
1892   PetscFunctionBegin;
1893   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1894   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1895   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1896 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1897   mat->offloadmask = PETSC_OFFLOAD_CPU;
1898 #endif
1899   PetscFunctionReturn(PETSC_SUCCESS);
1900 }
1901 
1902 /*@
1903   MatCreateDense - Creates a matrix in `MATDENSE` format.
1904 
1905   Collective
1906 
1907   Input Parameters:
1908 + comm - MPI communicator
1909 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1910 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1911 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1912 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1913 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1914    to control all matrix memory allocation.
1915 
1916   Output Parameter:
1917 . A - the matrix
1918 
1919   Level: intermediate
1920 
1921   Notes:
1922   The dense format is fully compatible with standard Fortran
1923   storage by columns.
1924 
1925   Although local portions of the matrix are stored in column-major
1926   order, the matrix is partitioned across MPI ranks by row.
1927 
1928   The data input variable is intended primarily for Fortran programmers
1929   who wish to allocate their own matrix memory space.  Most users should
1930   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1931 
1932   The user MUST specify either the local or global matrix dimensions
1933   (possibly both).
1934 
1935 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1936 @*/
1937 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1938 {
1939   PetscFunctionBegin;
1940   PetscCall(MatCreate(comm, A));
1941   PetscCall(MatSetSizes(*A, m, n, M, N));
1942   PetscCall(MatSetType(*A, MATDENSE));
1943   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1944   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1945   PetscFunctionReturn(PETSC_SUCCESS);
1946 }
1947 
1948 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1949 {
1950   Mat           mat;
1951   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1952 
1953   PetscFunctionBegin;
1954   *newmat = NULL;
1955   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1956   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1957   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1958   a = (Mat_MPIDense *)mat->data;
1959 
1960   mat->factortype   = A->factortype;
1961   mat->assembled    = PETSC_TRUE;
1962   mat->preallocated = PETSC_TRUE;
1963 
1964   mat->insertmode = NOT_SET_VALUES;
1965   a->donotstash   = oldmat->donotstash;
1966 
1967   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1968   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1969 
1970   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1971 
1972   *newmat = mat;
1973   PetscFunctionReturn(PETSC_SUCCESS);
1974 }
1975 
1976 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1977 {
1978   PetscBool isbinary;
1979 #if defined(PETSC_HAVE_HDF5)
1980   PetscBool ishdf5;
1981 #endif
1982 
1983   PetscFunctionBegin;
1984   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1985   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1986   /* force binary viewer to load .info file if it has not yet done so */
1987   PetscCall(PetscViewerSetUp(viewer));
1988   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1989 #if defined(PETSC_HAVE_HDF5)
1990   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1991 #endif
1992   if (isbinary) {
1993     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1994 #if defined(PETSC_HAVE_HDF5)
1995   } else if (ishdf5) {
1996     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1997 #endif
1998   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1999   PetscFunctionReturn(PETSC_SUCCESS);
2000 }
2001 
2002 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2003 {
2004   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2005   Mat           a, b;
2006 
2007   PetscFunctionBegin;
2008   a = matA->A;
2009   b = matB->A;
2010   PetscCall(MatEqual(a, b, flag));
2011   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2012   PetscFunctionReturn(PETSC_SUCCESS);
2013 }
2014 
2015 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2016 {
2017   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2018 
2019   PetscFunctionBegin;
2020   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2021   PetscCall(MatDestroy(&atb->atb));
2022   PetscCall(PetscFree(atb));
2023   PetscFunctionReturn(PETSC_SUCCESS);
2024 }
2025 
2026 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2027 {
2028   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2029 
2030   PetscFunctionBegin;
2031   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2032   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2033   PetscCall(PetscFree(abt));
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
2037 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2038 {
2039   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2040   Mat_TransMatMultDense *atb;
2041   MPI_Comm               comm;
2042   PetscMPIInt            size, *recvcounts;
2043   PetscScalar           *carray, *sendbuf;
2044   const PetscScalar     *atbarray;
2045   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2046   const PetscInt        *ranges;
2047 
2048   PetscFunctionBegin;
2049   MatCheckProduct(C, 3);
2050   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2051   atb        = (Mat_TransMatMultDense *)C->product->data;
2052   recvcounts = atb->recvcounts;
2053   sendbuf    = atb->sendbuf;
2054 
2055   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2056   PetscCallMPI(MPI_Comm_size(comm, &size));
2057 
2058   /* compute atbarray = aseq^T * bseq */
2059   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2060 
2061   PetscCall(MatGetOwnershipRanges(C, &ranges));
2062 
2063   /* arrange atbarray into sendbuf */
2064   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2065   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2066   for (proc = 0, k = 0; proc < size; proc++) {
2067     for (j = 0; j < cN; j++) {
2068       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2069     }
2070   }
2071   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2072 
2073   /* sum all atbarray to local values of C */
2074   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2075   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2076   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2077   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2078   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2079   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2084 {
2085   MPI_Comm               comm;
2086   PetscMPIInt            size;
2087   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2088   Mat_TransMatMultDense *atb;
2089   PetscBool              cisdense = PETSC_FALSE;
2090   const PetscInt        *ranges;
2091 
2092   PetscFunctionBegin;
2093   MatCheckProduct(C, 4);
2094   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2095   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2096   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2097     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2098   }
2099 
2100   /* create matrix product C */
2101   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2102 #if defined(PETSC_HAVE_CUDA)
2103   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2104 #endif
2105 #if defined(PETSC_HAVE_HIP)
2106   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2107 #endif
2108   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2109   PetscCall(MatSetUp(C));
2110 
2111   /* create data structure for reuse C */
2112   PetscCallMPI(MPI_Comm_size(comm, &size));
2113   PetscCall(PetscNew(&atb));
2114   cM = C->rmap->N;
2115   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2116   PetscCall(MatGetOwnershipRanges(C, &ranges));
2117   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2118   C->product->data    = atb;
2119   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2124 {
2125   MPI_Comm               comm;
2126   PetscMPIInt            i, size;
2127   PetscInt               maxRows, bufsiz;
2128   PetscMPIInt            tag;
2129   PetscInt               alg;
2130   Mat_MatTransMultDense *abt;
2131   Mat_Product           *product = C->product;
2132   PetscBool              flg;
2133 
2134   PetscFunctionBegin;
2135   MatCheckProduct(C, 4);
2136   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2137   /* check local size of A and B */
2138   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2139 
2140   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2141   alg = flg ? 0 : 1;
2142 
2143   /* setup matrix product C */
2144   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2145   PetscCall(MatSetType(C, MATMPIDENSE));
2146   PetscCall(MatSetUp(C));
2147   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2148 
2149   /* create data structure for reuse C */
2150   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2151   PetscCallMPI(MPI_Comm_size(comm, &size));
2152   PetscCall(PetscNew(&abt));
2153   abt->tag = tag;
2154   abt->alg = alg;
2155   switch (alg) {
2156   case 1: /* alg: "cyclic" */
2157     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2158     bufsiz = A->cmap->N * maxRows;
2159     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2160     break;
2161   default: /* alg: "allgatherv" */
2162     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2163     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2164     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2165     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2166     break;
2167   }
2168 
2169   C->product->data                = abt;
2170   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2171   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2172   PetscFunctionReturn(PETSC_SUCCESS);
2173 }
2174 
2175 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2176 {
2177   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2178   Mat_MatTransMultDense *abt;
2179   MPI_Comm               comm;
2180   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2181   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2182   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2183   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2184   const PetscScalar     *av, *bv;
2185   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2186   MPI_Request            reqs[2];
2187   const PetscInt        *ranges;
2188 
2189   PetscFunctionBegin;
2190   MatCheckProduct(C, 3);
2191   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2192   abt = (Mat_MatTransMultDense *)C->product->data;
2193   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2194   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2195   PetscCallMPI(MPI_Comm_size(comm, &size));
2196   PetscCall(MatDenseGetArrayRead(a->A, &av));
2197   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2198   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2199   PetscCall(MatDenseGetLDA(a->A, &i));
2200   PetscCall(PetscBLASIntCast(i, &alda));
2201   PetscCall(MatDenseGetLDA(b->A, &i));
2202   PetscCall(PetscBLASIntCast(i, &blda));
2203   PetscCall(MatDenseGetLDA(c->A, &i));
2204   PetscCall(PetscBLASIntCast(i, &clda));
2205   PetscCall(MatGetOwnershipRanges(B, &ranges));
2206   bn = B->rmap->n;
2207   if (blda == bn) {
2208     sendbuf = (PetscScalar *)bv;
2209   } else {
2210     sendbuf = abt->buf[0];
2211     for (k = 0, i = 0; i < cK; i++) {
2212       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2213     }
2214   }
2215   if (size > 1) {
2216     sendto   = (rank + size - 1) % size;
2217     recvfrom = (rank + size + 1) % size;
2218   } else {
2219     sendto = recvfrom = 0;
2220   }
2221   PetscCall(PetscBLASIntCast(cK, &ck));
2222   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2223   recvisfrom = rank;
2224   for (i = 0; i < size; i++) {
2225     /* we have finished receiving in sending, bufs can be read/modified */
2226     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2227     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2228 
2229     if (nextrecvisfrom != rank) {
2230       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2231       sendsiz = cK * bn;
2232       recvsiz = cK * nextbn;
2233       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2234       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2235       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2236     }
2237 
2238     /* local aseq * sendbuf^T */
2239     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2240     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2241 
2242     if (nextrecvisfrom != rank) {
2243       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2244       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2245     }
2246     bn         = nextbn;
2247     recvisfrom = nextrecvisfrom;
2248     sendbuf    = recvbuf;
2249   }
2250   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2251   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2252   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2253   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2254   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2255   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2256   PetscFunctionReturn(PETSC_SUCCESS);
2257 }
2258 
2259 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2260 {
2261   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2262   Mat_MatTransMultDense *abt;
2263   MPI_Comm               comm;
2264   PetscMPIInt            size, ibn;
2265   PetscScalar           *cv, *sendbuf, *recvbuf;
2266   const PetscScalar     *av, *bv;
2267   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2268   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2269   PetscBLASInt           cm, cn, ck, alda, clda;
2270 
2271   PetscFunctionBegin;
2272   MatCheckProduct(C, 3);
2273   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2274   abt = (Mat_MatTransMultDense *)C->product->data;
2275   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2276   PetscCallMPI(MPI_Comm_size(comm, &size));
2277   PetscCall(MatDenseGetArrayRead(a->A, &av));
2278   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2279   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2280   PetscCall(MatDenseGetLDA(a->A, &i));
2281   PetscCall(PetscBLASIntCast(i, &alda));
2282   PetscCall(MatDenseGetLDA(b->A, &blda));
2283   PetscCall(MatDenseGetLDA(c->A, &i));
2284   PetscCall(PetscBLASIntCast(i, &clda));
2285   /* copy transpose of B into buf[0] */
2286   bn      = B->rmap->n;
2287   sendbuf = abt->buf[0];
2288   recvbuf = abt->buf[1];
2289   for (k = 0, j = 0; j < bn; j++) {
2290     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2291   }
2292   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2293   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2294   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2295   PetscCall(PetscBLASIntCast(cK, &ck));
2296   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2297   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2298   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2299   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2300   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2301   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2302   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2303   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2304   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2305   PetscFunctionReturn(PETSC_SUCCESS);
2306 }
2307 
2308 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2309 {
2310   Mat_MatTransMultDense *abt;
2311 
2312   PetscFunctionBegin;
2313   MatCheckProduct(C, 3);
2314   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2315   abt = (Mat_MatTransMultDense *)C->product->data;
2316   switch (abt->alg) {
2317   case 1:
2318     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2319     break;
2320   default:
2321     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2322     break;
2323   }
2324   PetscFunctionReturn(PETSC_SUCCESS);
2325 }
2326 
2327 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2328 {
2329   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2330 
2331   PetscFunctionBegin;
2332   PetscCall(MatDestroy(&ab->Ce));
2333   PetscCall(MatDestroy(&ab->Ae));
2334   PetscCall(MatDestroy(&ab->Be));
2335   PetscCall(PetscFree(ab));
2336   PetscFunctionReturn(PETSC_SUCCESS);
2337 }
2338 
2339 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2340 {
2341   Mat_MatMultDense *ab;
2342   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2343 
2344   PetscFunctionBegin;
2345   MatCheckProduct(C, 3);
2346   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2347   ab = (Mat_MatMultDense *)C->product->data;
2348   if (ab->Ae && ab->Ce) {
2349 #if PetscDefined(HAVE_ELEMENTAL)
2350     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2351     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2352     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2353     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2354 #else
2355     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2356 #endif
2357   } else {
2358     const PetscScalar *read;
2359     PetscScalar       *write;
2360     PetscInt           lda;
2361 
2362     PetscCall(MatDenseGetLDA(B, &lda));
2363     PetscCall(MatDenseGetArrayRead(B, &read));
2364     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2365     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2366     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2367       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2368       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2369     }
2370     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2371     PetscCall(MatDenseRestoreArrayRead(B, &read));
2372     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2373   }
2374   PetscFunctionReturn(PETSC_SUCCESS);
2375 }
2376 
2377 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2378 {
2379   Mat_Product      *product = C->product;
2380   PetscInt          alg;
2381   Mat_MatMultDense *ab;
2382   PetscBool         flg;
2383 
2384   PetscFunctionBegin;
2385   MatCheckProduct(C, 4);
2386   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2387   /* check local size of A and B */
2388   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2389              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2390 
2391   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2392   alg = flg ? 0 : 1;
2393 
2394   /* setup C */
2395   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2396   PetscCall(MatSetType(C, MATMPIDENSE));
2397   PetscCall(MatSetUp(C));
2398 
2399   /* create data structure for reuse Cdense */
2400   PetscCall(PetscNew(&ab));
2401 
2402   switch (alg) {
2403   case 1: /* alg: "elemental" */
2404 #if PetscDefined(HAVE_ELEMENTAL)
2405     /* create elemental matrices Ae and Be */
2406     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2407     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2408     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2409     PetscCall(MatSetUp(ab->Ae));
2410     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2411 
2412     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2413     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2414     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2415     PetscCall(MatSetUp(ab->Be));
2416     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2417 
2418     /* compute symbolic Ce = Ae*Be */
2419     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2420     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2421 #else
2422     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2423 #endif
2424     break;
2425   default: /* alg: "petsc" */
2426     ab->Ae = NULL;
2427     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2428     ab->Ce = NULL;
2429     break;
2430   }
2431 
2432   C->product->data       = ab;
2433   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2434   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2435   PetscFunctionReturn(PETSC_SUCCESS);
2436 }
2437 
2438 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2439 {
2440   Mat_Product *product     = C->product;
2441   const char  *algTypes[2] = {"petsc", "elemental"};
2442   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2443   PetscBool    flg = PETSC_FALSE;
2444 
2445   PetscFunctionBegin;
2446   /* Set default algorithm */
2447   alg = 0; /* default is petsc */
2448   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2449   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2450 
2451   /* Get runtime option */
2452   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2453   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2454   PetscOptionsEnd();
2455   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2456 
2457   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2458   C->ops->productsymbolic = MatProductSymbolic_AB;
2459   PetscFunctionReturn(PETSC_SUCCESS);
2460 }
2461 
2462 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2463 {
2464   Mat_Product *product = C->product;
2465   Mat          A = product->A, B = product->B;
2466 
2467   PetscFunctionBegin;
2468   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2469              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2470   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2471   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2472   PetscFunctionReturn(PETSC_SUCCESS);
2473 }
2474 
2475 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2476 {
2477   Mat_Product *product     = C->product;
2478   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2479   PetscInt     alg, nalg = 2;
2480   PetscBool    flg = PETSC_FALSE;
2481 
2482   PetscFunctionBegin;
2483   /* Set default algorithm */
2484   alg = 0; /* default is allgatherv */
2485   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2486   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2487 
2488   /* Get runtime option */
2489   if (product->api_user) {
2490     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2491     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2492     PetscOptionsEnd();
2493   } else {
2494     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2495     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2496     PetscOptionsEnd();
2497   }
2498   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2499 
2500   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2501   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2502   PetscFunctionReturn(PETSC_SUCCESS);
2503 }
2504 
2505 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2506 {
2507   Mat_Product *product = C->product;
2508 
2509   PetscFunctionBegin;
2510   switch (product->type) {
2511   case MATPRODUCT_AB:
2512     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2513     break;
2514   case MATPRODUCT_AtB:
2515     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2516     break;
2517   case MATPRODUCT_ABt:
2518     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2519     break;
2520   default:
2521     break;
2522   }
2523   PetscFunctionReturn(PETSC_SUCCESS);
2524 }
2525