xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3db8ccc792d4bd10df0da905a21f07b40f720d8c)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right-hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
511 {
512   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
519   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
520   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
521   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
522   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
523   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
524   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
525   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecSet(yy, 0.0));
539   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555 
556   PetscFunctionBegin;
557   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
558   PetscCall(VecSet(yy, 0.0));
559   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
560   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
561   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
562   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
563   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
564   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
565   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
566   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
571 {
572   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
573   const PetscScalar *ax;
574   PetscScalar       *ay;
575   PetscMemType       axmtype, aymtype;
576 
577   PetscFunctionBegin;
578   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
579   PetscCall(VecCopy(yy, zz));
580   PetscMPIInt rank;
581   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
582   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
583   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
584   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
585   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
586   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
587   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
588   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
593 {
594   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
595   const PetscScalar *ax;
596   PetscScalar       *ay;
597   PetscMemType       axmtype, aymtype;
598 
599   PetscFunctionBegin;
600   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
601   PetscCall(VecCopy(yy, zz));
602   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
603   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
604   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
605   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
606   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
607   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
608   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
609   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
610   PetscFunctionReturn(PETSC_SUCCESS);
611 }
612 
613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
614 {
615   PetscFunctionBegin;
616   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
617   PetscFunctionReturn(PETSC_SUCCESS);
618 }
619 
620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
621 {
622   PetscFunctionBegin;
623   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
635 {
636   PetscFunctionBegin;
637   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
642 {
643   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
644   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
645   PetscScalar       *x;
646   const PetscScalar *av;
647 
648   PetscFunctionBegin;
649   PetscCall(VecGetArray(v, &x));
650   PetscCall(VecGetSize(v, &ng));
651   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
652   PetscCall(VecGetLocalSize(v, &nl));
653   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
654   radd = A->rmap->rstart * m;
655   PetscCall(MatDenseGetArrayRead(a->A, &av));
656   PetscCall(MatDenseGetLDA(a->A, &lda));
657   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
658   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
659   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
660   PetscCall(VecRestoreArray(v, &x));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
665 {
666   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
667 
668   PetscFunctionBegin;
669   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
670   PetscCall(MatStashDestroy_Private(&mat->stash));
671   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
672   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
673   PetscCall(MatDestroy(&mdn->A));
674   PetscCall(VecDestroy(&mdn->lvec));
675   PetscCall(PetscSFDestroy(&mdn->Mvctx));
676   PetscCall(VecDestroy(&mdn->cvec));
677   PetscCall(MatDestroy(&mdn->cmat));
678 
679   PetscCall(PetscFree(mat->data));
680   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
681 
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
695 #if defined(PETSC_HAVE_ELEMENTAL)
696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
697 #endif
698 #if defined(PETSC_HAVE_SCALAPACK)
699   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
700 #endif
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
703   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
704 #if defined(PETSC_HAVE_CUDA)
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
723 #endif
724 #if defined(PETSC_HAVE_HIP)
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
743 #endif
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
757 
758   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 #include <petscdraw.h>
763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
764 {
765   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
766   PetscMPIInt       rank;
767   PetscViewerType   vtype;
768   PetscBool         iascii, isdraw;
769   PetscViewer       sviewer;
770   PetscViewerFormat format;
771 
772   PetscFunctionBegin;
773   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
775   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
776   if (iascii) {
777     PetscCall(PetscViewerGetType(viewer, &vtype));
778     PetscCall(PetscViewerGetFormat(viewer, &format));
779     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
780       MatInfo info;
781       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
782       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
783       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
784                                                    (PetscInt)info.memory));
785       PetscCall(PetscViewerFlush(viewer));
786       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
787       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
788       PetscFunctionReturn(PETSC_SUCCESS);
789     } else if (format == PETSC_VIEWER_ASCII_INFO) {
790       PetscFunctionReturn(PETSC_SUCCESS);
791     }
792   } else if (isdraw) {
793     PetscDraw draw;
794     PetscBool isnull;
795 
796     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
797     PetscCall(PetscDrawIsNull(draw, &isnull));
798     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
799   }
800 
801   {
802     /* assemble the entire matrix onto first processor. */
803     Mat          A;
804     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
805     PetscInt    *cols;
806     PetscScalar *vals;
807 
808     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
809     if (rank == 0) {
810       PetscCall(MatSetSizes(A, M, N, M, N));
811     } else {
812       PetscCall(MatSetSizes(A, 0, 0, M, N));
813     }
814     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
815     PetscCall(MatSetType(A, MATMPIDENSE));
816     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
817 
818     /* Copy the matrix ... This isn't the most efficient means,
819        but it's quick for now */
820     A->insertmode = INSERT_VALUES;
821 
822     row = mat->rmap->rstart;
823     m   = mdn->A->rmap->n;
824     for (i = 0; i < m; i++) {
825       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
826       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
827       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
828       row++;
829     }
830 
831     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
833     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
834     if (rank == 0) {
835       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
836       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
837     }
838     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
839     PetscCall(MatDestroy(&A));
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
845 {
846   PetscBool iascii, isbinary, isdraw, issocket;
847 
848   PetscFunctionBegin;
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
852   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
853 
854   if (iascii || issocket || isdraw) {
855     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
856   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
861 {
862   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
863   Mat            mdn = mat->A;
864   PetscLogDouble isend[5], irecv[5];
865 
866   PetscFunctionBegin;
867   info->block_size = 1.0;
868 
869   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
870 
871   isend[0] = info->nz_used;
872   isend[1] = info->nz_allocated;
873   isend[2] = info->nz_unneeded;
874   isend[3] = info->memory;
875   isend[4] = info->mallocs;
876   if (flag == MAT_LOCAL) {
877     info->nz_used      = isend[0];
878     info->nz_allocated = isend[1];
879     info->nz_unneeded  = isend[2];
880     info->memory       = isend[3];
881     info->mallocs      = isend[4];
882   } else if (flag == MAT_GLOBAL_MAX) {
883     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
884 
885     info->nz_used      = irecv[0];
886     info->nz_allocated = irecv[1];
887     info->nz_unneeded  = irecv[2];
888     info->memory       = irecv[3];
889     info->mallocs      = irecv[4];
890   } else if (flag == MAT_GLOBAL_SUM) {
891     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
892 
893     info->nz_used      = irecv[0];
894     info->nz_allocated = irecv[1];
895     info->nz_unneeded  = irecv[2];
896     info->memory       = irecv[3];
897     info->mallocs      = irecv[4];
898   }
899   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
900   info->fill_ratio_needed = 0;
901   info->factor_mallocs    = 0;
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
906 {
907   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
908 
909   PetscFunctionBegin;
910   switch (op) {
911   case MAT_NEW_NONZERO_LOCATIONS:
912   case MAT_NEW_NONZERO_LOCATION_ERR:
913   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914     MatCheckPreallocated(A, 1);
915     PetscCall(MatSetOption(a->A, op, flg));
916     break;
917   case MAT_ROW_ORIENTED:
918     MatCheckPreallocated(A, 1);
919     a->roworiented = flg;
920     PetscCall(MatSetOption(a->A, op, flg));
921     break;
922   case MAT_FORCE_DIAGONAL_ENTRIES:
923   case MAT_KEEP_NONZERO_PATTERN:
924   case MAT_USE_HASH_TABLE:
925   case MAT_SORTED_FULL:
926   case MAT_IGNORE_LOWER_TRIANGULAR:
927   case MAT_IGNORE_ZERO_ENTRIES:
928   case MAT_SUBMAT_SINGLEIS:
929     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
930     break;
931   case MAT_IGNORE_OFF_PROC_ENTRIES:
932     a->donotstash = flg;
933     break;
934   case MAT_SYMMETRIC:
935   case MAT_STRUCTURALLY_SYMMETRIC:
936   case MAT_HERMITIAN:
937   case MAT_SYMMETRY_ETERNAL:
938   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
939   case MAT_SPD:
940   case MAT_SPD_ETERNAL:
941     /* if the diagonal matrix is square it inherits some of the properties above */
942     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
943     break;
944   default:
945     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
946   }
947   PetscFunctionReturn(PETSC_SUCCESS);
948 }
949 
950 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
951 {
952   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
953   const PetscScalar *l;
954   PetscScalar        x, *v, *vv, *r;
955   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
956 
957   PetscFunctionBegin;
958   PetscCall(MatDenseGetArray(mdn->A, &vv));
959   PetscCall(MatDenseGetLDA(mdn->A, &lda));
960   PetscCall(MatGetLocalSize(A, &s2, &s3));
961   if (ll) {
962     PetscCall(VecGetLocalSize(ll, &s2a));
963     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
964     PetscCall(VecGetArrayRead(ll, &l));
965     for (i = 0; i < m; i++) {
966       x = l[i];
967       v = vv + i;
968       for (j = 0; j < n; j++) {
969         (*v) *= x;
970         v += lda;
971       }
972     }
973     PetscCall(VecRestoreArrayRead(ll, &l));
974     PetscCall(PetscLogFlops(1.0 * n * m));
975   }
976   if (rr) {
977     const PetscScalar *ar;
978 
979     PetscCall(VecGetLocalSize(rr, &s3a));
980     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
981     PetscCall(VecGetArrayRead(rr, &ar));
982     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
983     PetscCall(VecGetArray(mdn->lvec, &r));
984     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
985     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
986     PetscCall(VecRestoreArrayRead(rr, &ar));
987     for (i = 0; i < n; i++) {
988       x = r[i];
989       v = vv + i * lda;
990       for (j = 0; j < m; j++) (*v++) *= x;
991     }
992     PetscCall(VecRestoreArray(mdn->lvec, &r));
993     PetscCall(PetscLogFlops(1.0 * n * m));
994   }
995   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
996   PetscFunctionReturn(PETSC_SUCCESS);
997 }
998 
999 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1000 {
1001   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1002   PetscInt           i, j;
1003   PetscMPIInt        size;
1004   PetscReal          sum = 0.0;
1005   const PetscScalar *av, *v;
1006 
1007   PetscFunctionBegin;
1008   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1009   v = av;
1010   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1011   if (size == 1) {
1012     PetscCall(MatNorm(mdn->A, type, nrm));
1013   } else {
1014     if (type == NORM_FROBENIUS) {
1015       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1016         sum += PetscRealPart(PetscConj(*v) * (*v));
1017         v++;
1018       }
1019       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1020       *nrm = PetscSqrtReal(*nrm);
1021       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1022     } else if (type == NORM_1) {
1023       PetscReal *tmp;
1024 
1025       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1026       *nrm = 0.0;
1027       v    = av;
1028       for (j = 0; j < mdn->A->cmap->n; j++) {
1029         for (i = 0; i < mdn->A->rmap->n; i++) {
1030           tmp[j] += PetscAbsScalar(*v);
1031           v++;
1032         }
1033       }
1034       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1035       for (j = 0; j < A->cmap->N; j++) {
1036         if (tmp[j] > *nrm) *nrm = tmp[j];
1037       }
1038       PetscCall(PetscFree(tmp));
1039       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1040     } else if (type == NORM_INFINITY) { /* max row norm */
1041       PetscCall(MatNorm(mdn->A, type, nrm));
1042       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1043     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1044   }
1045   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1046   PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 
1049 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1050 {
1051   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1052   Mat           B;
1053   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1054   PetscInt      j, i, lda;
1055   PetscScalar  *v;
1056 
1057   PetscFunctionBegin;
1058   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1059   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1060     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1061     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1062     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1063     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1064   } else B = *matout;
1065 
1066   m = a->A->rmap->n;
1067   n = a->A->cmap->n;
1068   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1069   PetscCall(MatDenseGetLDA(a->A, &lda));
1070   PetscCall(PetscMalloc1(m, &rwork));
1071   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1072   for (j = 0; j < n; j++) {
1073     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1074     v = PetscSafePointerPlusOffset(v, lda);
1075   }
1076   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1077   PetscCall(PetscFree(rwork));
1078   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1079   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1080   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1081     *matout = B;
1082   } else {
1083     PetscCall(MatHeaderMerge(A, &B));
1084   }
1085   PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087 
1088 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1089 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1090 
1091 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1092 {
1093   PetscFunctionBegin;
1094   PetscCall(PetscLayoutSetUp(A->rmap));
1095   PetscCall(PetscLayoutSetUp(A->cmap));
1096   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1101 {
1102   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1110 {
1111   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(MatConjugate(a->A));
1115   PetscFunctionReturn(PETSC_SUCCESS);
1116 }
1117 
1118 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1119 {
1120   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1121 
1122   PetscFunctionBegin;
1123   PetscCall(MatRealPart(a->A));
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1128 {
1129   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1130 
1131   PetscFunctionBegin;
1132   PetscCall(MatImaginaryPart(a->A));
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1137 {
1138   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1139 
1140   PetscFunctionBegin;
1141   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1142   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1143   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1148 
1149 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1150 {
1151   PetscInt      i, m, n;
1152   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(MatGetSize(A, &m, &n));
1156   if (type == REDUCTION_MEAN_REALPART) {
1157     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1158   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1159     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1160   } else {
1161     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1162   }
1163   if (type == NORM_2) {
1164     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1165   }
1166   if (type == NORM_INFINITY) {
1167     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1168   } else {
1169     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1170   }
1171   if (type == NORM_2) {
1172     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1173   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1174     for (i = 0; i < n; i++) reductions[i] /= m;
1175   }
1176   PetscFunctionReturn(PETSC_SUCCESS);
1177 }
1178 
1179 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1180 {
1181   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1182 
1183   PetscFunctionBegin;
1184   PetscCall(MatSetRandom(d->A, rctx));
1185 #if defined(PETSC_HAVE_DEVICE)
1186   x->offloadmask = d->A->offloadmask;
1187 #endif
1188   PetscFunctionReturn(PETSC_SUCCESS);
1189 }
1190 
1191 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1192 {
1193   PetscFunctionBegin;
1194   *missing = PETSC_FALSE;
1195   PetscFunctionReturn(PETSC_SUCCESS);
1196 }
1197 
1198 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1199 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1200 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1201 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1202 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1203 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1204 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1205 
1206 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1207                                        MatGetRow_MPIDense,
1208                                        MatRestoreRow_MPIDense,
1209                                        MatMult_MPIDense,
1210                                        /*  4*/ MatMultAdd_MPIDense,
1211                                        MatMultTranspose_MPIDense,
1212                                        MatMultTransposeAdd_MPIDense,
1213                                        NULL,
1214                                        NULL,
1215                                        NULL,
1216                                        /* 10*/ NULL,
1217                                        NULL,
1218                                        NULL,
1219                                        NULL,
1220                                        MatTranspose_MPIDense,
1221                                        /* 15*/ MatGetInfo_MPIDense,
1222                                        MatEqual_MPIDense,
1223                                        MatGetDiagonal_MPIDense,
1224                                        MatDiagonalScale_MPIDense,
1225                                        MatNorm_MPIDense,
1226                                        /* 20*/ MatAssemblyBegin_MPIDense,
1227                                        MatAssemblyEnd_MPIDense,
1228                                        MatSetOption_MPIDense,
1229                                        MatZeroEntries_MPIDense,
1230                                        /* 24*/ MatZeroRows_MPIDense,
1231                                        NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        /* 29*/ MatSetUp_MPIDense,
1236                                        NULL,
1237                                        NULL,
1238                                        MatGetDiagonalBlock_MPIDense,
1239                                        NULL,
1240                                        /* 34*/ MatDuplicate_MPIDense,
1241                                        NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        /* 39*/ MatAXPY_MPIDense,
1246                                        MatCreateSubMatrices_MPIDense,
1247                                        NULL,
1248                                        MatGetValues_MPIDense,
1249                                        MatCopy_MPIDense,
1250                                        /* 44*/ NULL,
1251                                        MatScale_MPIDense,
1252                                        MatShift_MPIDense,
1253                                        NULL,
1254                                        NULL,
1255                                        /* 49*/ MatSetRandom_MPIDense,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        /* 54*/ NULL,
1261                                        NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1266                                        MatDestroy_MPIDense,
1267                                        MatView_MPIDense,
1268                                        NULL,
1269                                        NULL,
1270                                        /* 64*/ NULL,
1271                                        NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        /* 69*/ NULL,
1276                                        NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        /* 74*/ NULL,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        /* 79*/ NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        /* 83*/ MatLoad_MPIDense,
1290                                        NULL,
1291                                        NULL,
1292                                        NULL,
1293                                        NULL,
1294                                        NULL,
1295                                        /* 89*/ NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        /* 94*/ NULL,
1301                                        NULL,
1302                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1303                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1304                                        NULL,
1305                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1306                                        NULL,
1307                                        NULL,
1308                                        MatConjugate_MPIDense,
1309                                        NULL,
1310                                        /*104*/ NULL,
1311                                        MatRealPart_MPIDense,
1312                                        MatImaginaryPart_MPIDense,
1313                                        NULL,
1314                                        NULL,
1315                                        /*109*/ NULL,
1316                                        NULL,
1317                                        NULL,
1318                                        MatGetColumnVector_MPIDense,
1319                                        MatMissingDiagonal_MPIDense,
1320                                        /*114*/ NULL,
1321                                        NULL,
1322                                        NULL,
1323                                        NULL,
1324                                        NULL,
1325                                        /*119*/ NULL,
1326                                        NULL,
1327                                        MatMultHermitianTranspose_MPIDense,
1328                                        MatMultHermitianTransposeAdd_MPIDense,
1329                                        NULL,
1330                                        /*124*/ NULL,
1331                                        MatGetColumnReductions_MPIDense,
1332                                        NULL,
1333                                        NULL,
1334                                        NULL,
1335                                        /*129*/ NULL,
1336                                        NULL,
1337                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1338                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1339                                        NULL,
1340                                        /*134*/ NULL,
1341                                        NULL,
1342                                        NULL,
1343                                        NULL,
1344                                        NULL,
1345                                        /*139*/ NULL,
1346                                        NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1351                                        /*145*/ NULL,
1352                                        NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        /*150*/ NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        /*155*/ NULL,
1362                                        NULL};
1363 
1364 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1365 {
1366   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1367   MatType       mtype = MATSEQDENSE;
1368 
1369   PetscFunctionBegin;
1370   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1371   PetscCall(PetscLayoutSetUp(mat->rmap));
1372   PetscCall(PetscLayoutSetUp(mat->cmap));
1373   if (!a->A) {
1374     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1375     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1376   }
1377 #if defined(PETSC_HAVE_CUDA)
1378   PetscBool iscuda;
1379   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1380   if (iscuda) mtype = MATSEQDENSECUDA;
1381 #endif
1382 #if defined(PETSC_HAVE_HIP)
1383   PetscBool iship;
1384   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1385   if (iship) mtype = MATSEQDENSEHIP;
1386 #endif
1387   PetscCall(MatSetType(a->A, mtype));
1388   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1389 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1390   mat->offloadmask = a->A->offloadmask;
1391 #endif
1392   mat->preallocated = PETSC_TRUE;
1393   mat->assembled    = PETSC_TRUE;
1394   PetscFunctionReturn(PETSC_SUCCESS);
1395 }
1396 
1397 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1398 {
1399   Mat B, C;
1400 
1401   PetscFunctionBegin;
1402   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1403   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1404   PetscCall(MatDestroy(&C));
1405   if (reuse == MAT_REUSE_MATRIX) {
1406     C = *newmat;
1407   } else C = NULL;
1408   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1409   PetscCall(MatDestroy(&B));
1410   if (reuse == MAT_INPLACE_MATRIX) {
1411     PetscCall(MatHeaderReplace(A, &C));
1412   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1413   PetscFunctionReturn(PETSC_SUCCESS);
1414 }
1415 
1416 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1417 {
1418   Mat B, C;
1419 
1420   PetscFunctionBegin;
1421   PetscCall(MatDenseGetLocalMatrix(A, &C));
1422   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1423   if (reuse == MAT_REUSE_MATRIX) {
1424     C = *newmat;
1425   } else C = NULL;
1426   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1427   PetscCall(MatDestroy(&B));
1428   if (reuse == MAT_INPLACE_MATRIX) {
1429     PetscCall(MatHeaderReplace(A, &C));
1430   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1431   PetscFunctionReturn(PETSC_SUCCESS);
1432 }
1433 
1434 #if defined(PETSC_HAVE_ELEMENTAL)
1435 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1436 {
1437   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1438   Mat           mat_elemental;
1439   PetscScalar  *v;
1440   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1441 
1442   PetscFunctionBegin;
1443   if (reuse == MAT_REUSE_MATRIX) {
1444     mat_elemental = *newmat;
1445     PetscCall(MatZeroEntries(*newmat));
1446   } else {
1447     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1448     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1449     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1450     PetscCall(MatSetUp(mat_elemental));
1451     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1452   }
1453 
1454   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1455   for (i = 0; i < N; i++) cols[i] = i;
1456   for (i = 0; i < m; i++) rows[i] = rstart + i;
1457 
1458   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1459   PetscCall(MatDenseGetArray(A, &v));
1460   PetscCall(MatDenseGetLDA(a->A, &lda));
1461   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1462   else {
1463     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1464   }
1465   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1466   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1467   PetscCall(MatDenseRestoreArray(A, &v));
1468   PetscCall(PetscFree2(rows, cols));
1469 
1470   if (reuse == MAT_INPLACE_MATRIX) {
1471     PetscCall(MatHeaderReplace(A, &mat_elemental));
1472   } else {
1473     *newmat = mat_elemental;
1474   }
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 #endif
1478 
1479 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1480 {
1481   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1482 
1483   PetscFunctionBegin;
1484   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1489 {
1490   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1491 
1492   PetscFunctionBegin;
1493   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1498 {
1499   Mat_MPIDense *mat;
1500   PetscInt      m, nloc, N;
1501 
1502   PetscFunctionBegin;
1503   PetscCall(MatGetSize(inmat, &m, &N));
1504   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1505   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1506     PetscInt sum;
1507 
1508     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1509     /* Check sum(n) = N */
1510     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1511     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1512 
1513     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1514     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1515   }
1516 
1517   /* numeric phase */
1518   mat = (Mat_MPIDense *)(*outmat)->data;
1519   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1520   PetscFunctionReturn(PETSC_SUCCESS);
1521 }
1522 
1523 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1524 {
1525   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1526   PetscInt      lda;
1527 
1528   PetscFunctionBegin;
1529   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1530   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1531   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1532   a->vecinuse = col + 1;
1533   PetscCall(MatDenseGetLDA(a->A, &lda));
1534   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1535   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1536   *v = a->cvec;
1537   PetscFunctionReturn(PETSC_SUCCESS);
1538 }
1539 
1540 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1541 {
1542   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1543 
1544   PetscFunctionBegin;
1545   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1546   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1547   VecCheckAssembled(a->cvec);
1548   a->vecinuse = 0;
1549   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1550   PetscCall(VecResetArray(a->cvec));
1551   if (v) *v = NULL;
1552   PetscFunctionReturn(PETSC_SUCCESS);
1553 }
1554 
1555 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1556 {
1557   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1558   PetscInt      lda;
1559 
1560   PetscFunctionBegin;
1561   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1562   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1563   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1564   a->vecinuse = col + 1;
1565   PetscCall(MatDenseGetLDA(a->A, &lda));
1566   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1567   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1568   PetscCall(VecLockReadPush(a->cvec));
1569   *v = a->cvec;
1570   PetscFunctionReturn(PETSC_SUCCESS);
1571 }
1572 
1573 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1574 {
1575   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1576 
1577   PetscFunctionBegin;
1578   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1579   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1580   VecCheckAssembled(a->cvec);
1581   a->vecinuse = 0;
1582   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1583   PetscCall(VecLockReadPop(a->cvec));
1584   PetscCall(VecResetArray(a->cvec));
1585   if (v) *v = NULL;
1586   PetscFunctionReturn(PETSC_SUCCESS);
1587 }
1588 
1589 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1590 {
1591   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1592   PetscInt      lda;
1593 
1594   PetscFunctionBegin;
1595   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1596   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1597   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1598   a->vecinuse = col + 1;
1599   PetscCall(MatDenseGetLDA(a->A, &lda));
1600   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1601   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1602   *v = a->cvec;
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1607 {
1608   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1612   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1613   VecCheckAssembled(a->cvec);
1614   a->vecinuse = 0;
1615   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1616   PetscCall(VecResetArray(a->cvec));
1617   if (v) *v = NULL;
1618   PetscFunctionReturn(PETSC_SUCCESS);
1619 }
1620 
1621 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1622 {
1623   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1624   Mat_MPIDense *c;
1625   MPI_Comm      comm;
1626   PetscInt      pbegin, pend;
1627 
1628   PetscFunctionBegin;
1629   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1630   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1631   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1632   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1633   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1634   if (!a->cmat) {
1635     PetscCall(MatCreate(comm, &a->cmat));
1636     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1637     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1638     else {
1639       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1640       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1641       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1642     }
1643     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1644     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1645   } else {
1646     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1647     if (same && a->cmat->rmap->N != A->rmap->N) {
1648       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1649       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1650     }
1651     if (!same) {
1652       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1653       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1654       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1655       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1656       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1657     }
1658     if (cend - cbegin != a->cmat->cmap->N) {
1659       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1660       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1661       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1662       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1663     }
1664   }
1665   c = (Mat_MPIDense *)a->cmat->data;
1666   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1667   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1668 
1669   a->cmat->preallocated = PETSC_TRUE;
1670   a->cmat->assembled    = PETSC_TRUE;
1671 #if defined(PETSC_HAVE_DEVICE)
1672   a->cmat->offloadmask = c->A->offloadmask;
1673 #endif
1674   a->matinuse = cbegin + 1;
1675   *v          = a->cmat;
1676   PetscFunctionReturn(PETSC_SUCCESS);
1677 }
1678 
1679 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1680 {
1681   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1682   Mat_MPIDense *c;
1683 
1684   PetscFunctionBegin;
1685   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1686   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1687   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1688   a->matinuse = 0;
1689   c           = (Mat_MPIDense *)a->cmat->data;
1690   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1691   if (v) *v = NULL;
1692 #if defined(PETSC_HAVE_DEVICE)
1693   A->offloadmask = a->A->offloadmask;
1694 #endif
1695   PetscFunctionReturn(PETSC_SUCCESS);
1696 }
1697 
1698 /*MC
1699    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1700 
1701    Options Database Key:
1702 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1703 
1704   Level: beginner
1705 
1706 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1707 M*/
1708 PetscErrorCode MatCreate_MPIDense(Mat mat)
1709 {
1710   Mat_MPIDense *a;
1711 
1712   PetscFunctionBegin;
1713   PetscCall(PetscNew(&a));
1714   mat->data   = (void *)a;
1715   mat->ops[0] = MatOps_Values;
1716 
1717   mat->insertmode = NOT_SET_VALUES;
1718 
1719   /* build cache for off array entries formed */
1720   a->donotstash = PETSC_FALSE;
1721 
1722   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1723 
1724   /* stuff used for matrix vector multiply */
1725   a->lvec        = NULL;
1726   a->Mvctx       = NULL;
1727   a->roworiented = PETSC_TRUE;
1728 
1729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1750 #if defined(PETSC_HAVE_ELEMENTAL)
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1752 #endif
1753 #if defined(PETSC_HAVE_SCALAPACK)
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1755 #endif
1756 #if defined(PETSC_HAVE_CUDA)
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1758 #endif
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1762 #if defined(PETSC_HAVE_CUDA)
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1765 #endif
1766 #if defined(PETSC_HAVE_HIP)
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1770 #endif
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1776   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1777   PetscFunctionReturn(PETSC_SUCCESS);
1778 }
1779 
1780 /*MC
1781    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1782 
1783    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1784    and `MATMPIDENSE` otherwise.
1785 
1786    Options Database Key:
1787 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1788 
1789   Level: beginner
1790 
1791 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1792 M*/
1793 
1794 /*@
1795   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1796 
1797   Collective
1798 
1799   Input Parameters:
1800 + B    - the matrix
1801 - data - optional location of matrix data.  Set to `NULL` for PETSc
1802          to control all matrix memory allocation.
1803 
1804   Level: intermediate
1805 
1806   Notes:
1807   The dense format is fully compatible with standard Fortran
1808   storage by columns.
1809 
1810   The data input variable is intended primarily for Fortran programmers
1811   who wish to allocate their own matrix memory space.  Most users should
1812   set `data` to `NULL`.
1813 
1814 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1815 @*/
1816 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1817 {
1818   PetscFunctionBegin;
1819   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1820   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1821   PetscFunctionReturn(PETSC_SUCCESS);
1822 }
1823 
1824 /*@
1825   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1826   array provided by the user. This is useful to avoid copying an array
1827   into a matrix
1828 
1829   Not Collective
1830 
1831   Input Parameters:
1832 + mat   - the matrix
1833 - array - the array in column major order
1834 
1835   Level: developer
1836 
1837   Note:
1838   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1839   freed when the matrix is destroyed.
1840 
1841 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1842           `MatDenseReplaceArray()`
1843 @*/
1844 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1845 {
1846   PetscFunctionBegin;
1847   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1848   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1849   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1850 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1851   mat->offloadmask = PETSC_OFFLOAD_CPU;
1852 #endif
1853   PetscFunctionReturn(PETSC_SUCCESS);
1854 }
1855 
1856 /*@
1857   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1858 
1859   Not Collective
1860 
1861   Input Parameter:
1862 . mat - the matrix
1863 
1864   Level: developer
1865 
1866   Note:
1867   You can only call this after a call to `MatDensePlaceArray()`
1868 
1869 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1870 @*/
1871 PetscErrorCode MatDenseResetArray(Mat mat)
1872 {
1873   PetscFunctionBegin;
1874   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1875   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1876   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1877   PetscFunctionReturn(PETSC_SUCCESS);
1878 }
1879 
1880 /*@
1881   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1882   array provided by the user. This is useful to avoid copying an array
1883   into a matrix
1884 
1885   Not Collective
1886 
1887   Input Parameters:
1888 + mat   - the matrix
1889 - array - the array in column major order
1890 
1891   Level: developer
1892 
1893   Note:
1894   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1895   freed by the user. It will be freed when the matrix is destroyed.
1896 
1897 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1898 @*/
1899 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1900 {
1901   PetscFunctionBegin;
1902   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1903   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1904   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1905 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1906   mat->offloadmask = PETSC_OFFLOAD_CPU;
1907 #endif
1908   PetscFunctionReturn(PETSC_SUCCESS);
1909 }
1910 
1911 /*@
1912   MatCreateDense - Creates a matrix in `MATDENSE` format.
1913 
1914   Collective
1915 
1916   Input Parameters:
1917 + comm - MPI communicator
1918 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1919 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1920 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1921 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1922 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1923    to control all matrix memory allocation.
1924 
1925   Output Parameter:
1926 . A - the matrix
1927 
1928   Level: intermediate
1929 
1930   Notes:
1931   The dense format is fully compatible with standard Fortran
1932   storage by columns.
1933 
1934   Although local portions of the matrix are stored in column-major
1935   order, the matrix is partitioned across MPI ranks by row.
1936 
1937   The data input variable is intended primarily for Fortran programmers
1938   who wish to allocate their own matrix memory space.  Most users should
1939   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1940 
1941   The user MUST specify either the local or global matrix dimensions
1942   (possibly both).
1943 
1944 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1945 @*/
1946 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1947 {
1948   PetscFunctionBegin;
1949   PetscCall(MatCreate(comm, A));
1950   PetscCall(MatSetSizes(*A, m, n, M, N));
1951   PetscCall(MatSetType(*A, MATDENSE));
1952   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1953   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1954   PetscFunctionReturn(PETSC_SUCCESS);
1955 }
1956 
1957 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1958 {
1959   Mat           mat;
1960   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1961 
1962   PetscFunctionBegin;
1963   *newmat = NULL;
1964   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1965   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1966   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1967   a = (Mat_MPIDense *)mat->data;
1968 
1969   mat->factortype   = A->factortype;
1970   mat->assembled    = PETSC_TRUE;
1971   mat->preallocated = PETSC_TRUE;
1972 
1973   mat->insertmode = NOT_SET_VALUES;
1974   a->donotstash   = oldmat->donotstash;
1975 
1976   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1977   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1978 
1979   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1980 
1981   *newmat = mat;
1982   PetscFunctionReturn(PETSC_SUCCESS);
1983 }
1984 
1985 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1986 {
1987   PetscBool isbinary;
1988 #if defined(PETSC_HAVE_HDF5)
1989   PetscBool ishdf5;
1990 #endif
1991 
1992   PetscFunctionBegin;
1993   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1994   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1995   /* force binary viewer to load .info file if it has not yet done so */
1996   PetscCall(PetscViewerSetUp(viewer));
1997   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1998 #if defined(PETSC_HAVE_HDF5)
1999   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2000 #endif
2001   if (isbinary) {
2002     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2003 #if defined(PETSC_HAVE_HDF5)
2004   } else if (ishdf5) {
2005     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2006 #endif
2007   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2008   PetscFunctionReturn(PETSC_SUCCESS);
2009 }
2010 
2011 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2012 {
2013   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2014   Mat           a, b;
2015 
2016   PetscFunctionBegin;
2017   a = matA->A;
2018   b = matB->A;
2019   PetscCall(MatEqual(a, b, flag));
2020   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2021   PetscFunctionReturn(PETSC_SUCCESS);
2022 }
2023 
2024 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2025 {
2026   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2027 
2028   PetscFunctionBegin;
2029   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2030   PetscCall(MatDestroy(&atb->atb));
2031   PetscCall(PetscFree(atb));
2032   PetscFunctionReturn(PETSC_SUCCESS);
2033 }
2034 
2035 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2036 {
2037   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2038 
2039   PetscFunctionBegin;
2040   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2041   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2042   PetscCall(PetscFree(abt));
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2047 {
2048   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2049   Mat_TransMatMultDense *atb;
2050   MPI_Comm               comm;
2051   PetscMPIInt            size, *recvcounts;
2052   PetscScalar           *carray, *sendbuf;
2053   const PetscScalar     *atbarray;
2054   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2055   const PetscInt        *ranges;
2056 
2057   PetscFunctionBegin;
2058   MatCheckProduct(C, 3);
2059   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2060   atb        = (Mat_TransMatMultDense *)C->product->data;
2061   recvcounts = atb->recvcounts;
2062   sendbuf    = atb->sendbuf;
2063 
2064   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2065   PetscCallMPI(MPI_Comm_size(comm, &size));
2066 
2067   /* compute atbarray = aseq^T * bseq */
2068   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2069 
2070   PetscCall(MatGetOwnershipRanges(C, &ranges));
2071 
2072   /* arrange atbarray into sendbuf */
2073   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2074   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2075   for (proc = 0, k = 0; proc < size; proc++) {
2076     for (j = 0; j < cN; j++) {
2077       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2078     }
2079   }
2080   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2081 
2082   /* sum all atbarray to local values of C */
2083   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2084   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2085   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2086   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2087   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2088   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2089   PetscFunctionReturn(PETSC_SUCCESS);
2090 }
2091 
2092 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2093 {
2094   MPI_Comm               comm;
2095   PetscMPIInt            size;
2096   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2097   Mat_TransMatMultDense *atb;
2098   PetscBool              cisdense = PETSC_FALSE;
2099   const PetscInt        *ranges;
2100 
2101   PetscFunctionBegin;
2102   MatCheckProduct(C, 4);
2103   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2104   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2105   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2106     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2107   }
2108 
2109   /* create matrix product C */
2110   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2111 #if defined(PETSC_HAVE_CUDA)
2112   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2113 #endif
2114 #if defined(PETSC_HAVE_HIP)
2115   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2116 #endif
2117   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2118   PetscCall(MatSetUp(C));
2119 
2120   /* create data structure for reuse C */
2121   PetscCallMPI(MPI_Comm_size(comm, &size));
2122   PetscCall(PetscNew(&atb));
2123   cM = C->rmap->N;
2124   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2125   PetscCall(MatGetOwnershipRanges(C, &ranges));
2126   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2127   C->product->data    = atb;
2128   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2129   PetscFunctionReturn(PETSC_SUCCESS);
2130 }
2131 
2132 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2133 {
2134   MPI_Comm               comm;
2135   PetscMPIInt            i, size;
2136   PetscInt               maxRows, bufsiz;
2137   PetscMPIInt            tag;
2138   PetscInt               alg;
2139   Mat_MatTransMultDense *abt;
2140   Mat_Product           *product = C->product;
2141   PetscBool              flg;
2142 
2143   PetscFunctionBegin;
2144   MatCheckProduct(C, 4);
2145   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2146   /* check local size of A and B */
2147   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2148 
2149   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2150   alg = flg ? 0 : 1;
2151 
2152   /* setup matrix product C */
2153   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2154   PetscCall(MatSetType(C, MATMPIDENSE));
2155   PetscCall(MatSetUp(C));
2156   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2157 
2158   /* create data structure for reuse C */
2159   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2160   PetscCallMPI(MPI_Comm_size(comm, &size));
2161   PetscCall(PetscNew(&abt));
2162   abt->tag = tag;
2163   abt->alg = alg;
2164   switch (alg) {
2165   case 1: /* alg: "cyclic" */
2166     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2167     bufsiz = A->cmap->N * maxRows;
2168     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2169     break;
2170   default: /* alg: "allgatherv" */
2171     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2172     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2173     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2174     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2175     break;
2176   }
2177 
2178   C->product->data                = abt;
2179   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2180   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2181   PetscFunctionReturn(PETSC_SUCCESS);
2182 }
2183 
2184 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2185 {
2186   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2187   Mat_MatTransMultDense *abt;
2188   MPI_Comm               comm;
2189   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2190   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2191   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2192   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2193   const PetscScalar     *av, *bv;
2194   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2195   MPI_Request            reqs[2];
2196   const PetscInt        *ranges;
2197 
2198   PetscFunctionBegin;
2199   MatCheckProduct(C, 3);
2200   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2201   abt = (Mat_MatTransMultDense *)C->product->data;
2202   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2203   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2204   PetscCallMPI(MPI_Comm_size(comm, &size));
2205   PetscCall(MatDenseGetArrayRead(a->A, &av));
2206   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2207   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2208   PetscCall(MatDenseGetLDA(a->A, &i));
2209   PetscCall(PetscBLASIntCast(i, &alda));
2210   PetscCall(MatDenseGetLDA(b->A, &i));
2211   PetscCall(PetscBLASIntCast(i, &blda));
2212   PetscCall(MatDenseGetLDA(c->A, &i));
2213   PetscCall(PetscBLASIntCast(i, &clda));
2214   PetscCall(MatGetOwnershipRanges(B, &ranges));
2215   bn = B->rmap->n;
2216   if (blda == bn) {
2217     sendbuf = (PetscScalar *)bv;
2218   } else {
2219     sendbuf = abt->buf[0];
2220     for (k = 0, i = 0; i < cK; i++) {
2221       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2222     }
2223   }
2224   if (size > 1) {
2225     sendto   = (rank + size - 1) % size;
2226     recvfrom = (rank + size + 1) % size;
2227   } else {
2228     sendto = recvfrom = 0;
2229   }
2230   PetscCall(PetscBLASIntCast(cK, &ck));
2231   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2232   recvisfrom = rank;
2233   for (i = 0; i < size; i++) {
2234     /* we have finished receiving in sending, bufs can be read/modified */
2235     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2236     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2237 
2238     if (nextrecvisfrom != rank) {
2239       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2240       sendsiz = cK * bn;
2241       recvsiz = cK * nextbn;
2242       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2243       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2244       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2245     }
2246 
2247     /* local aseq * sendbuf^T */
2248     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2249     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2250 
2251     if (nextrecvisfrom != rank) {
2252       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2253       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2254     }
2255     bn         = nextbn;
2256     recvisfrom = nextrecvisfrom;
2257     sendbuf    = recvbuf;
2258   }
2259   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2260   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2261   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2262   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2263   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2264   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2265   PetscFunctionReturn(PETSC_SUCCESS);
2266 }
2267 
2268 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2269 {
2270   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2271   Mat_MatTransMultDense *abt;
2272   MPI_Comm               comm;
2273   PetscMPIInt            size, ibn;
2274   PetscScalar           *cv, *sendbuf, *recvbuf;
2275   const PetscScalar     *av, *bv;
2276   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2277   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2278   PetscBLASInt           cm, cn, ck, alda, clda;
2279 
2280   PetscFunctionBegin;
2281   MatCheckProduct(C, 3);
2282   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2283   abt = (Mat_MatTransMultDense *)C->product->data;
2284   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2285   PetscCallMPI(MPI_Comm_size(comm, &size));
2286   PetscCall(MatDenseGetArrayRead(a->A, &av));
2287   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2288   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2289   PetscCall(MatDenseGetLDA(a->A, &i));
2290   PetscCall(PetscBLASIntCast(i, &alda));
2291   PetscCall(MatDenseGetLDA(b->A, &blda));
2292   PetscCall(MatDenseGetLDA(c->A, &i));
2293   PetscCall(PetscBLASIntCast(i, &clda));
2294   /* copy transpose of B into buf[0] */
2295   bn      = B->rmap->n;
2296   sendbuf = abt->buf[0];
2297   recvbuf = abt->buf[1];
2298   for (k = 0, j = 0; j < bn; j++) {
2299     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2300   }
2301   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2302   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2303   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2304   PetscCall(PetscBLASIntCast(cK, &ck));
2305   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2306   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2307   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2308   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2309   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2310   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2311   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2312   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2313   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2314   PetscFunctionReturn(PETSC_SUCCESS);
2315 }
2316 
2317 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2318 {
2319   Mat_MatTransMultDense *abt;
2320 
2321   PetscFunctionBegin;
2322   MatCheckProduct(C, 3);
2323   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2324   abt = (Mat_MatTransMultDense *)C->product->data;
2325   switch (abt->alg) {
2326   case 1:
2327     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2328     break;
2329   default:
2330     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2331     break;
2332   }
2333   PetscFunctionReturn(PETSC_SUCCESS);
2334 }
2335 
2336 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2337 {
2338   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2339 
2340   PetscFunctionBegin;
2341   PetscCall(MatDestroy(&ab->Ce));
2342   PetscCall(MatDestroy(&ab->Ae));
2343   PetscCall(MatDestroy(&ab->Be));
2344   PetscCall(PetscFree(ab));
2345   PetscFunctionReturn(PETSC_SUCCESS);
2346 }
2347 
2348 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2349 {
2350   Mat_MatMultDense *ab;
2351   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2352 
2353   PetscFunctionBegin;
2354   MatCheckProduct(C, 3);
2355   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2356   ab = (Mat_MatMultDense *)C->product->data;
2357   if (ab->Ae && ab->Ce) {
2358 #if PetscDefined(HAVE_ELEMENTAL)
2359     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2360     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2361     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2362     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2363 #else
2364     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2365 #endif
2366   } else {
2367     const PetscScalar *read;
2368     PetscScalar       *write;
2369     PetscInt           lda;
2370 
2371     PetscCall(MatDenseGetLDA(B, &lda));
2372     PetscCall(MatDenseGetArrayRead(B, &read));
2373     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2374     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2375     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2376       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2377       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2378     }
2379     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2380     PetscCall(MatDenseRestoreArrayRead(B, &read));
2381     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2382   }
2383   PetscFunctionReturn(PETSC_SUCCESS);
2384 }
2385 
2386 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2387 {
2388   Mat_Product      *product = C->product;
2389   PetscInt          alg;
2390   Mat_MatMultDense *ab;
2391   PetscBool         flg;
2392 
2393   PetscFunctionBegin;
2394   MatCheckProduct(C, 4);
2395   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2396   /* check local size of A and B */
2397   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2398              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2399 
2400   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2401   alg = flg ? 0 : 1;
2402 
2403   /* setup C */
2404   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2405   PetscCall(MatSetType(C, MATMPIDENSE));
2406   PetscCall(MatSetUp(C));
2407 
2408   /* create data structure for reuse Cdense */
2409   PetscCall(PetscNew(&ab));
2410 
2411   switch (alg) {
2412   case 1: /* alg: "elemental" */
2413 #if PetscDefined(HAVE_ELEMENTAL)
2414     /* create elemental matrices Ae and Be */
2415     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2416     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2417     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2418     PetscCall(MatSetUp(ab->Ae));
2419     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2420 
2421     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2422     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2423     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2424     PetscCall(MatSetUp(ab->Be));
2425     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2426 
2427     /* compute symbolic Ce = Ae*Be */
2428     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2429     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2430 #else
2431     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2432 #endif
2433     break;
2434   default: /* alg: "petsc" */
2435     ab->Ae = NULL;
2436     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2437     ab->Ce = NULL;
2438     break;
2439   }
2440 
2441   C->product->data       = ab;
2442   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2443   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2444   PetscFunctionReturn(PETSC_SUCCESS);
2445 }
2446 
2447 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2448 {
2449   Mat_Product *product     = C->product;
2450   const char  *algTypes[2] = {"petsc", "elemental"};
2451   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2452   PetscBool    flg = PETSC_FALSE;
2453 
2454   PetscFunctionBegin;
2455   /* Set default algorithm */
2456   alg = 0; /* default is petsc */
2457   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2458   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2459 
2460   /* Get runtime option */
2461   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2462   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2463   PetscOptionsEnd();
2464   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2465 
2466   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2467   C->ops->productsymbolic = MatProductSymbolic_AB;
2468   PetscFunctionReturn(PETSC_SUCCESS);
2469 }
2470 
2471 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2472 {
2473   Mat_Product *product = C->product;
2474   Mat          A = product->A, B = product->B;
2475 
2476   PetscFunctionBegin;
2477   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2478              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2479   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2480   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2481   PetscFunctionReturn(PETSC_SUCCESS);
2482 }
2483 
2484 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2485 {
2486   Mat_Product *product     = C->product;
2487   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2488   PetscInt     alg, nalg = 2;
2489   PetscBool    flg = PETSC_FALSE;
2490 
2491   PetscFunctionBegin;
2492   /* Set default algorithm */
2493   alg = 0; /* default is allgatherv */
2494   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2495   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2496 
2497   /* Get runtime option */
2498   if (product->api_user) {
2499     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2500     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2501     PetscOptionsEnd();
2502   } else {
2503     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2504     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2505     PetscOptionsEnd();
2506   }
2507   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2508 
2509   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2510   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2511   PetscFunctionReturn(PETSC_SUCCESS);
2512 }
2513 
2514 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2515 {
2516   Mat_Product *product = C->product;
2517 
2518   PetscFunctionBegin;
2519   switch (product->type) {
2520   case MATPRODUCT_AB:
2521     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2522     break;
2523   case MATPRODUCT_AtB:
2524     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2525     break;
2526   case MATPRODUCT_ABt:
2527     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2528     break;
2529   default:
2530     break;
2531   }
2532   PetscFunctionReturn(PETSC_SUCCESS);
2533 }
2534