xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3e21cddb284bcc2817c8d6e539a88b2f2b2f7f3c)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right-hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
470 
471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472 {
473   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474   const PetscScalar *ax;
475   PetscScalar       *ay;
476   PetscMemType       axmtype, aymtype;
477 
478   PetscFunctionBegin;
479   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
480   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
481   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
482   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
483   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
484   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
485   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
486   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
491 {
492   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
493   const PetscScalar *ax;
494   PetscScalar       *ay;
495   PetscMemType       axmtype, aymtype;
496 
497   PetscFunctionBegin;
498   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
510 {
511   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
512   const PetscScalar *ax;
513   PetscScalar       *ay;
514   PetscMemType       axmtype, aymtype;
515 
516   PetscFunctionBegin;
517   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
518   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
519   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
520   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
521   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
522   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
523   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
524   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
525   PetscFunctionReturn(PETSC_SUCCESS);
526 }
527 
528 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
529 {
530   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
531   const PetscScalar *ax;
532   PetscScalar       *ay;
533   PetscMemType       axmtype, aymtype;
534 
535   PetscFunctionBegin;
536   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
537   PetscCall(VecSet(yy, 0.0));
538   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
539   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
540   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
541   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
542   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
543   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
544   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
545   PetscFunctionReturn(PETSC_SUCCESS);
546 }
547 
548 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
549 {
550   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
551   const PetscScalar *ax;
552   PetscScalar       *ay;
553   PetscMemType       axmtype, aymtype;
554 
555   PetscFunctionBegin;
556   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
557   PetscCall(VecSet(yy, 0.0));
558   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
559   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
560   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
561   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
562   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
563   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
564   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
565   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
570 {
571   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
572   const PetscScalar *ax;
573   PetscScalar       *ay;
574   PetscMemType       axmtype, aymtype;
575 
576   PetscFunctionBegin;
577   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
578   PetscCall(VecCopy(yy, zz));
579   PetscMPIInt rank;
580   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
581   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
582   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
583   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
584   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
585   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
586   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
587   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
592 {
593   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
594   const PetscScalar *ax;
595   PetscScalar       *ay;
596   PetscMemType       axmtype, aymtype;
597 
598   PetscFunctionBegin;
599   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
600   PetscCall(VecCopy(yy, zz));
601   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
602   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
603   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
604   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
605   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
606   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
607   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
608   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
609   PetscFunctionReturn(PETSC_SUCCESS);
610 }
611 
612 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
613 {
614   PetscFunctionBegin;
615   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
620 {
621   PetscFunctionBegin;
622   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
623   PetscFunctionReturn(PETSC_SUCCESS);
624 }
625 
626 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
627 {
628   PetscFunctionBegin;
629   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
630   PetscFunctionReturn(PETSC_SUCCESS);
631 }
632 
633 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
634 {
635   PetscFunctionBegin;
636   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
641 {
642   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
643   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
644   PetscScalar       *x;
645   const PetscScalar *av;
646 
647   PetscFunctionBegin;
648   PetscCall(VecGetArray(v, &x));
649   PetscCall(VecGetSize(v, &ng));
650   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
651   PetscCall(VecGetLocalSize(v, &nl));
652   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
653   radd = A->rmap->rstart * m;
654   PetscCall(MatDenseGetArrayRead(a->A, &av));
655   PetscCall(MatDenseGetLDA(a->A, &lda));
656   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
657   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
658   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
659   PetscCall(VecRestoreArray(v, &x));
660   PetscFunctionReturn(PETSC_SUCCESS);
661 }
662 
663 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
664 {
665   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
666 
667   PetscFunctionBegin;
668   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
669   PetscCall(MatStashDestroy_Private(&mat->stash));
670   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
671   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
672   PetscCall(MatDestroy(&mdn->A));
673   PetscCall(VecDestroy(&mdn->lvec));
674   PetscCall(PetscSFDestroy(&mdn->Mvctx));
675   PetscCall(VecDestroy(&mdn->cvec));
676   PetscCall(MatDestroy(&mdn->cmat));
677 
678   PetscCall(PetscFree(mat->data));
679   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
680 
681   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
694 #if defined(PETSC_HAVE_ELEMENTAL)
695   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
696 #endif
697 #if defined(PETSC_HAVE_SCALAPACK)
698   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
699 #endif
700   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
703 #if defined(PETSC_HAVE_CUDA)
704   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
722 #endif
723 #if defined(PETSC_HAVE_HIP)
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
742 #endif
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
756 
757   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 #include <petscdraw.h>
762 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
763 {
764   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
765   PetscMPIInt       rank;
766   PetscViewerType   vtype;
767   PetscBool         iascii, isdraw;
768   PetscViewer       sviewer;
769   PetscViewerFormat format;
770 
771   PetscFunctionBegin;
772   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
773   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
775   if (iascii) {
776     PetscCall(PetscViewerGetType(viewer, &vtype));
777     PetscCall(PetscViewerGetFormat(viewer, &format));
778     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
779       MatInfo info;
780       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
781       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
782       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
783                                                    (PetscInt)info.memory));
784       PetscCall(PetscViewerFlush(viewer));
785       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
786       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
787       PetscFunctionReturn(PETSC_SUCCESS);
788     } else if (format == PETSC_VIEWER_ASCII_INFO) {
789       PetscFunctionReturn(PETSC_SUCCESS);
790     }
791   } else if (isdraw) {
792     PetscDraw draw;
793     PetscBool isnull;
794 
795     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
796     PetscCall(PetscDrawIsNull(draw, &isnull));
797     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
798   }
799 
800   {
801     /* assemble the entire matrix onto first processor. */
802     Mat          A;
803     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
804     PetscInt    *cols;
805     PetscScalar *vals;
806 
807     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
808     if (rank == 0) {
809       PetscCall(MatSetSizes(A, M, N, M, N));
810     } else {
811       PetscCall(MatSetSizes(A, 0, 0, M, N));
812     }
813     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
814     PetscCall(MatSetType(A, MATMPIDENSE));
815     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
816 
817     /* Copy the matrix ... This isn't the most efficient means,
818        but it's quick for now */
819     A->insertmode = INSERT_VALUES;
820 
821     row = mat->rmap->rstart;
822     m   = mdn->A->rmap->n;
823     for (i = 0; i < m; i++) {
824       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
825       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
826       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
827       row++;
828     }
829 
830     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
831     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
833     if (rank == 0) {
834       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
835       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
836     }
837     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
838     PetscCall(MatDestroy(&A));
839   }
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
844 {
845   PetscBool iascii, isbinary, isdraw, issocket;
846 
847   PetscFunctionBegin;
848   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
852 
853   if (iascii || issocket || isdraw) {
854     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
855   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
856   PetscFunctionReturn(PETSC_SUCCESS);
857 }
858 
859 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
860 {
861   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
862   Mat            mdn = mat->A;
863   PetscLogDouble isend[5], irecv[5];
864 
865   PetscFunctionBegin;
866   info->block_size = 1.0;
867 
868   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
869 
870   isend[0] = info->nz_used;
871   isend[1] = info->nz_allocated;
872   isend[2] = info->nz_unneeded;
873   isend[3] = info->memory;
874   isend[4] = info->mallocs;
875   if (flag == MAT_LOCAL) {
876     info->nz_used      = isend[0];
877     info->nz_allocated = isend[1];
878     info->nz_unneeded  = isend[2];
879     info->memory       = isend[3];
880     info->mallocs      = isend[4];
881   } else if (flag == MAT_GLOBAL_MAX) {
882     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
883 
884     info->nz_used      = irecv[0];
885     info->nz_allocated = irecv[1];
886     info->nz_unneeded  = irecv[2];
887     info->memory       = irecv[3];
888     info->mallocs      = irecv[4];
889   } else if (flag == MAT_GLOBAL_SUM) {
890     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
891 
892     info->nz_used      = irecv[0];
893     info->nz_allocated = irecv[1];
894     info->nz_unneeded  = irecv[2];
895     info->memory       = irecv[3];
896     info->mallocs      = irecv[4];
897   }
898   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
899   info->fill_ratio_needed = 0;
900   info->factor_mallocs    = 0;
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
905 {
906   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
907 
908   PetscFunctionBegin;
909   switch (op) {
910   case MAT_NEW_NONZERO_LOCATIONS:
911   case MAT_NEW_NONZERO_LOCATION_ERR:
912   case MAT_NEW_NONZERO_ALLOCATION_ERR:
913     MatCheckPreallocated(A, 1);
914     PetscCall(MatSetOption(a->A, op, flg));
915     break;
916   case MAT_ROW_ORIENTED:
917     MatCheckPreallocated(A, 1);
918     a->roworiented = flg;
919     PetscCall(MatSetOption(a->A, op, flg));
920     break;
921   case MAT_FORCE_DIAGONAL_ENTRIES:
922   case MAT_KEEP_NONZERO_PATTERN:
923   case MAT_USE_HASH_TABLE:
924   case MAT_SORTED_FULL:
925     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
926     break;
927   case MAT_IGNORE_OFF_PROC_ENTRIES:
928     a->donotstash = flg;
929     break;
930   case MAT_SYMMETRIC:
931   case MAT_STRUCTURALLY_SYMMETRIC:
932   case MAT_HERMITIAN:
933   case MAT_SYMMETRY_ETERNAL:
934   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
935   case MAT_SPD:
936   case MAT_IGNORE_LOWER_TRIANGULAR:
937   case MAT_IGNORE_ZERO_ENTRIES:
938   case MAT_SPD_ETERNAL:
939     /* if the diagonal matrix is square it inherits some of the properties above */
940     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
941     break;
942   default:
943     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
944   }
945   PetscFunctionReturn(PETSC_SUCCESS);
946 }
947 
948 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
949 {
950   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
951   const PetscScalar *l;
952   PetscScalar        x, *v, *vv, *r;
953   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
954 
955   PetscFunctionBegin;
956   PetscCall(MatDenseGetArray(mdn->A, &vv));
957   PetscCall(MatDenseGetLDA(mdn->A, &lda));
958   PetscCall(MatGetLocalSize(A, &s2, &s3));
959   if (ll) {
960     PetscCall(VecGetLocalSize(ll, &s2a));
961     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
962     PetscCall(VecGetArrayRead(ll, &l));
963     for (i = 0; i < m; i++) {
964       x = l[i];
965       v = vv + i;
966       for (j = 0; j < n; j++) {
967         (*v) *= x;
968         v += lda;
969       }
970     }
971     PetscCall(VecRestoreArrayRead(ll, &l));
972     PetscCall(PetscLogFlops(1.0 * n * m));
973   }
974   if (rr) {
975     const PetscScalar *ar;
976 
977     PetscCall(VecGetLocalSize(rr, &s3a));
978     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
979     PetscCall(VecGetArrayRead(rr, &ar));
980     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
981     PetscCall(VecGetArray(mdn->lvec, &r));
982     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
983     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
984     PetscCall(VecRestoreArrayRead(rr, &ar));
985     for (i = 0; i < n; i++) {
986       x = r[i];
987       v = vv + i * lda;
988       for (j = 0; j < m; j++) (*v++) *= x;
989     }
990     PetscCall(VecRestoreArray(mdn->lvec, &r));
991     PetscCall(PetscLogFlops(1.0 * n * m));
992   }
993   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
994   PetscFunctionReturn(PETSC_SUCCESS);
995 }
996 
997 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
998 {
999   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1000   PetscInt           i, j;
1001   PetscMPIInt        size;
1002   PetscReal          sum = 0.0;
1003   const PetscScalar *av, *v;
1004 
1005   PetscFunctionBegin;
1006   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1007   v = av;
1008   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1009   if (size == 1) {
1010     PetscCall(MatNorm(mdn->A, type, nrm));
1011   } else {
1012     if (type == NORM_FROBENIUS) {
1013       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1014         sum += PetscRealPart(PetscConj(*v) * (*v));
1015         v++;
1016       }
1017       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1018       *nrm = PetscSqrtReal(*nrm);
1019       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1020     } else if (type == NORM_1) {
1021       PetscReal *tmp, *tmp2;
1022       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
1023       *nrm = 0.0;
1024       v    = av;
1025       for (j = 0; j < mdn->A->cmap->n; j++) {
1026         for (i = 0; i < mdn->A->rmap->n; i++) {
1027           tmp[j] += PetscAbsScalar(*v);
1028           v++;
1029         }
1030       }
1031       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1032       for (j = 0; j < A->cmap->N; j++) {
1033         if (tmp2[j] > *nrm) *nrm = tmp2[j];
1034       }
1035       PetscCall(PetscFree2(tmp, tmp2));
1036       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1037     } else if (type == NORM_INFINITY) { /* max row norm */
1038       PetscReal ntemp;
1039       PetscCall(MatNorm(mdn->A, type, &ntemp));
1040       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1041     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1042   }
1043   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1044   PetscFunctionReturn(PETSC_SUCCESS);
1045 }
1046 
1047 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1048 {
1049   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1050   Mat           B;
1051   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1052   PetscInt      j, i, lda;
1053   PetscScalar  *v;
1054 
1055   PetscFunctionBegin;
1056   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1057   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1058     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1059     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1060     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1061     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1062   } else B = *matout;
1063 
1064   m = a->A->rmap->n;
1065   n = a->A->cmap->n;
1066   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1067   PetscCall(MatDenseGetLDA(a->A, &lda));
1068   PetscCall(PetscMalloc1(m, &rwork));
1069   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1070   for (j = 0; j < n; j++) {
1071     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1072     v = PetscSafePointerPlusOffset(v, lda);
1073   }
1074   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1075   PetscCall(PetscFree(rwork));
1076   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1077   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1078   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1079     *matout = B;
1080   } else {
1081     PetscCall(MatHeaderMerge(A, &B));
1082   }
1083   PetscFunctionReturn(PETSC_SUCCESS);
1084 }
1085 
1086 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1087 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1088 
1089 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1090 {
1091   PetscFunctionBegin;
1092   PetscCall(PetscLayoutSetUp(A->rmap));
1093   PetscCall(PetscLayoutSetUp(A->cmap));
1094   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1095   PetscFunctionReturn(PETSC_SUCCESS);
1096 }
1097 
1098 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1099 {
1100   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1101 
1102   PetscFunctionBegin;
1103   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1108 {
1109   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCall(MatConjugate(a->A));
1113   PetscFunctionReturn(PETSC_SUCCESS);
1114 }
1115 
1116 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1117 {
1118   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1119 
1120   PetscFunctionBegin;
1121   PetscCall(MatRealPart(a->A));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1126 {
1127   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1128 
1129   PetscFunctionBegin;
1130   PetscCall(MatImaginaryPart(a->A));
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1135 {
1136   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1137 
1138   PetscFunctionBegin;
1139   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1140   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1141   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1146 
1147 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1148 {
1149   PetscInt      i, m, n;
1150   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1151   PetscReal    *work;
1152 
1153   PetscFunctionBegin;
1154   PetscCall(MatGetSize(A, &m, &n));
1155   PetscCall(PetscMalloc1(n, &work));
1156   if (type == REDUCTION_MEAN_REALPART) {
1157     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1158   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1159     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1160   } else {
1161     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1162   }
1163   if (type == NORM_2) {
1164     for (i = 0; i < n; i++) work[i] *= work[i];
1165   }
1166   if (type == NORM_INFINITY) {
1167     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1168   } else {
1169     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1170   }
1171   PetscCall(PetscFree(work));
1172   if (type == NORM_2) {
1173     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1174   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1175     for (i = 0; i < n; i++) reductions[i] /= m;
1176   }
1177   PetscFunctionReturn(PETSC_SUCCESS);
1178 }
1179 
1180 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1181 {
1182   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1183 
1184   PetscFunctionBegin;
1185   PetscCall(MatSetRandom(d->A, rctx));
1186 #if defined(PETSC_HAVE_DEVICE)
1187   x->offloadmask = d->A->offloadmask;
1188 #endif
1189   PetscFunctionReturn(PETSC_SUCCESS);
1190 }
1191 
1192 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1193 {
1194   PetscFunctionBegin;
1195   *missing = PETSC_FALSE;
1196   PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198 
1199 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1200 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1201 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1202 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1203 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1204 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1205 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1206 
1207 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1208                                        MatGetRow_MPIDense,
1209                                        MatRestoreRow_MPIDense,
1210                                        MatMult_MPIDense,
1211                                        /*  4*/ MatMultAdd_MPIDense,
1212                                        MatMultTranspose_MPIDense,
1213                                        MatMultTransposeAdd_MPIDense,
1214                                        NULL,
1215                                        NULL,
1216                                        NULL,
1217                                        /* 10*/ NULL,
1218                                        NULL,
1219                                        NULL,
1220                                        NULL,
1221                                        MatTranspose_MPIDense,
1222                                        /* 15*/ MatGetInfo_MPIDense,
1223                                        MatEqual_MPIDense,
1224                                        MatGetDiagonal_MPIDense,
1225                                        MatDiagonalScale_MPIDense,
1226                                        MatNorm_MPIDense,
1227                                        /* 20*/ MatAssemblyBegin_MPIDense,
1228                                        MatAssemblyEnd_MPIDense,
1229                                        MatSetOption_MPIDense,
1230                                        MatZeroEntries_MPIDense,
1231                                        /* 24*/ MatZeroRows_MPIDense,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        /* 29*/ MatSetUp_MPIDense,
1237                                        NULL,
1238                                        NULL,
1239                                        MatGetDiagonalBlock_MPIDense,
1240                                        NULL,
1241                                        /* 34*/ MatDuplicate_MPIDense,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        NULL,
1246                                        /* 39*/ MatAXPY_MPIDense,
1247                                        MatCreateSubMatrices_MPIDense,
1248                                        NULL,
1249                                        MatGetValues_MPIDense,
1250                                        MatCopy_MPIDense,
1251                                        /* 44*/ NULL,
1252                                        MatScale_MPIDense,
1253                                        MatShift_MPIDense,
1254                                        NULL,
1255                                        NULL,
1256                                        /* 49*/ MatSetRandom_MPIDense,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        NULL,
1261                                        /* 54*/ NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1267                                        MatDestroy_MPIDense,
1268                                        MatView_MPIDense,
1269                                        NULL,
1270                                        NULL,
1271                                        /* 64*/ NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        /* 69*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        NULL,
1281                                        /* 74*/ NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        /* 79*/ NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        /* 83*/ MatLoad_MPIDense,
1291                                        NULL,
1292                                        NULL,
1293                                        NULL,
1294                                        NULL,
1295                                        NULL,
1296                                        /* 89*/ NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        NULL,
1301                                        /* 94*/ NULL,
1302                                        NULL,
1303                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1304                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1305                                        NULL,
1306                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1307                                        NULL,
1308                                        NULL,
1309                                        MatConjugate_MPIDense,
1310                                        NULL,
1311                                        /*104*/ NULL,
1312                                        MatRealPart_MPIDense,
1313                                        MatImaginaryPart_MPIDense,
1314                                        NULL,
1315                                        NULL,
1316                                        /*109*/ NULL,
1317                                        NULL,
1318                                        NULL,
1319                                        MatGetColumnVector_MPIDense,
1320                                        MatMissingDiagonal_MPIDense,
1321                                        /*114*/ NULL,
1322                                        NULL,
1323                                        NULL,
1324                                        NULL,
1325                                        NULL,
1326                                        /*119*/ NULL,
1327                                        NULL,
1328                                        MatMultHermitianTranspose_MPIDense,
1329                                        MatMultHermitianTransposeAdd_MPIDense,
1330                                        NULL,
1331                                        /*124*/ NULL,
1332                                        MatGetColumnReductions_MPIDense,
1333                                        NULL,
1334                                        NULL,
1335                                        NULL,
1336                                        /*129*/ NULL,
1337                                        NULL,
1338                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1339                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1340                                        NULL,
1341                                        /*134*/ NULL,
1342                                        NULL,
1343                                        NULL,
1344                                        NULL,
1345                                        NULL,
1346                                        /*139*/ NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1352                                        /*145*/ NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        NULL,
1357                                        /*150*/ NULL,
1358                                        NULL,
1359                                        NULL};
1360 
1361 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1362 {
1363   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1364   MatType       mtype = MATSEQDENSE;
1365 
1366   PetscFunctionBegin;
1367   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1368   PetscCall(PetscLayoutSetUp(mat->rmap));
1369   PetscCall(PetscLayoutSetUp(mat->cmap));
1370   if (!a->A) {
1371     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1372     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1373   }
1374 #if defined(PETSC_HAVE_CUDA)
1375   PetscBool iscuda;
1376   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1377   if (iscuda) mtype = MATSEQDENSECUDA;
1378 #endif
1379 #if defined(PETSC_HAVE_HIP)
1380   PetscBool iship;
1381   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1382   if (iship) mtype = MATSEQDENSEHIP;
1383 #endif
1384   PetscCall(MatSetType(a->A, mtype));
1385   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1386 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1387   mat->offloadmask = a->A->offloadmask;
1388 #endif
1389   mat->preallocated = PETSC_TRUE;
1390   mat->assembled    = PETSC_TRUE;
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
1394 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1395 {
1396   Mat B, C;
1397 
1398   PetscFunctionBegin;
1399   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1400   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1401   PetscCall(MatDestroy(&C));
1402   if (reuse == MAT_REUSE_MATRIX) {
1403     C = *newmat;
1404   } else C = NULL;
1405   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1406   PetscCall(MatDestroy(&B));
1407   if (reuse == MAT_INPLACE_MATRIX) {
1408     PetscCall(MatHeaderReplace(A, &C));
1409   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1410   PetscFunctionReturn(PETSC_SUCCESS);
1411 }
1412 
1413 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1414 {
1415   Mat B, C;
1416 
1417   PetscFunctionBegin;
1418   PetscCall(MatDenseGetLocalMatrix(A, &C));
1419   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1420   if (reuse == MAT_REUSE_MATRIX) {
1421     C = *newmat;
1422   } else C = NULL;
1423   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1424   PetscCall(MatDestroy(&B));
1425   if (reuse == MAT_INPLACE_MATRIX) {
1426     PetscCall(MatHeaderReplace(A, &C));
1427   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1428   PetscFunctionReturn(PETSC_SUCCESS);
1429 }
1430 
1431 #if defined(PETSC_HAVE_ELEMENTAL)
1432 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1433 {
1434   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1435   Mat           mat_elemental;
1436   PetscScalar  *v;
1437   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1438 
1439   PetscFunctionBegin;
1440   if (reuse == MAT_REUSE_MATRIX) {
1441     mat_elemental = *newmat;
1442     PetscCall(MatZeroEntries(*newmat));
1443   } else {
1444     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1445     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1446     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1447     PetscCall(MatSetUp(mat_elemental));
1448     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1449   }
1450 
1451   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1452   for (i = 0; i < N; i++) cols[i] = i;
1453   for (i = 0; i < m; i++) rows[i] = rstart + i;
1454 
1455   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1456   PetscCall(MatDenseGetArray(A, &v));
1457   PetscCall(MatDenseGetLDA(a->A, &lda));
1458   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1459   else {
1460     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1461   }
1462   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1463   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1464   PetscCall(MatDenseRestoreArray(A, &v));
1465   PetscCall(PetscFree2(rows, cols));
1466 
1467   if (reuse == MAT_INPLACE_MATRIX) {
1468     PetscCall(MatHeaderReplace(A, &mat_elemental));
1469   } else {
1470     *newmat = mat_elemental;
1471   }
1472   PetscFunctionReturn(PETSC_SUCCESS);
1473 }
1474 #endif
1475 
1476 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1477 {
1478   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1479 
1480   PetscFunctionBegin;
1481   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1482   PetscFunctionReturn(PETSC_SUCCESS);
1483 }
1484 
1485 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1486 {
1487   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1488 
1489   PetscFunctionBegin;
1490   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1491   PetscFunctionReturn(PETSC_SUCCESS);
1492 }
1493 
1494 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1495 {
1496   Mat_MPIDense *mat;
1497   PetscInt      m, nloc, N;
1498 
1499   PetscFunctionBegin;
1500   PetscCall(MatGetSize(inmat, &m, &N));
1501   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1502   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1503     PetscInt sum;
1504 
1505     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1506     /* Check sum(n) = N */
1507     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1508     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1509 
1510     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1511     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1512   }
1513 
1514   /* numeric phase */
1515   mat = (Mat_MPIDense *)(*outmat)->data;
1516   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1517   PetscFunctionReturn(PETSC_SUCCESS);
1518 }
1519 
1520 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1521 {
1522   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1523   PetscInt      lda;
1524 
1525   PetscFunctionBegin;
1526   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1527   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1528   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1529   a->vecinuse = col + 1;
1530   PetscCall(MatDenseGetLDA(a->A, &lda));
1531   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1532   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1533   *v = a->cvec;
1534   PetscFunctionReturn(PETSC_SUCCESS);
1535 }
1536 
1537 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1538 {
1539   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1540 
1541   PetscFunctionBegin;
1542   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1543   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1544   a->vecinuse = 0;
1545   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1546   PetscCall(VecResetArray(a->cvec));
1547   if (v) *v = NULL;
1548   PetscFunctionReturn(PETSC_SUCCESS);
1549 }
1550 
1551 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1552 {
1553   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1554   PetscInt      lda;
1555 
1556   PetscFunctionBegin;
1557   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1558   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1559   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1560   a->vecinuse = col + 1;
1561   PetscCall(MatDenseGetLDA(a->A, &lda));
1562   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1563   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1564   PetscCall(VecLockReadPush(a->cvec));
1565   *v = a->cvec;
1566   PetscFunctionReturn(PETSC_SUCCESS);
1567 }
1568 
1569 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1570 {
1571   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1572 
1573   PetscFunctionBegin;
1574   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1575   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1576   a->vecinuse = 0;
1577   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1578   PetscCall(VecLockReadPop(a->cvec));
1579   PetscCall(VecResetArray(a->cvec));
1580   if (v) *v = NULL;
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1585 {
1586   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1587   PetscInt      lda;
1588 
1589   PetscFunctionBegin;
1590   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1591   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1592   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1593   a->vecinuse = col + 1;
1594   PetscCall(MatDenseGetLDA(a->A, &lda));
1595   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1596   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1597   *v = a->cvec;
1598   PetscFunctionReturn(PETSC_SUCCESS);
1599 }
1600 
1601 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1602 {
1603   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1604 
1605   PetscFunctionBegin;
1606   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1607   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1608   a->vecinuse = 0;
1609   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1610   PetscCall(VecResetArray(a->cvec));
1611   if (v) *v = NULL;
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1616 {
1617   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1618   Mat_MPIDense *c;
1619   MPI_Comm      comm;
1620   PetscInt      pbegin, pend;
1621 
1622   PetscFunctionBegin;
1623   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1624   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1625   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1626   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1627   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1628   if (!a->cmat) {
1629     PetscCall(MatCreate(comm, &a->cmat));
1630     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1631     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1632     else {
1633       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1634       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1635       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1636     }
1637     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1638     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1639   } else {
1640     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1641     if (same && a->cmat->rmap->N != A->rmap->N) {
1642       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1643       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1644     }
1645     if (!same) {
1646       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1647       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1648       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1649       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1650       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1651     }
1652     if (cend - cbegin != a->cmat->cmap->N) {
1653       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1654       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1655       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1656       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1657     }
1658   }
1659   c = (Mat_MPIDense *)a->cmat->data;
1660   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1661   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1662 
1663   a->cmat->preallocated = PETSC_TRUE;
1664   a->cmat->assembled    = PETSC_TRUE;
1665 #if defined(PETSC_HAVE_DEVICE)
1666   a->cmat->offloadmask = c->A->offloadmask;
1667 #endif
1668   a->matinuse = cbegin + 1;
1669   *v          = a->cmat;
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1674 {
1675   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1676   Mat_MPIDense *c;
1677 
1678   PetscFunctionBegin;
1679   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1680   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1681   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1682   a->matinuse = 0;
1683   c           = (Mat_MPIDense *)a->cmat->data;
1684   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1685   if (v) *v = NULL;
1686 #if defined(PETSC_HAVE_DEVICE)
1687   A->offloadmask = a->A->offloadmask;
1688 #endif
1689   PetscFunctionReturn(PETSC_SUCCESS);
1690 }
1691 
1692 /*MC
1693    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1694 
1695    Options Database Key:
1696 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1697 
1698   Level: beginner
1699 
1700 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1701 M*/
1702 PetscErrorCode MatCreate_MPIDense(Mat mat)
1703 {
1704   Mat_MPIDense *a;
1705 
1706   PetscFunctionBegin;
1707   PetscCall(PetscNew(&a));
1708   mat->data   = (void *)a;
1709   mat->ops[0] = MatOps_Values;
1710 
1711   mat->insertmode = NOT_SET_VALUES;
1712 
1713   /* build cache for off array entries formed */
1714   a->donotstash = PETSC_FALSE;
1715 
1716   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1717 
1718   /* stuff used for matrix vector multiply */
1719   a->lvec        = NULL;
1720   a->Mvctx       = NULL;
1721   a->roworiented = PETSC_TRUE;
1722 
1723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1744 #if defined(PETSC_HAVE_ELEMENTAL)
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1746 #endif
1747 #if defined(PETSC_HAVE_SCALAPACK)
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1749 #endif
1750 #if defined(PETSC_HAVE_CUDA)
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1752 #endif
1753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1756 #if defined(PETSC_HAVE_CUDA)
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1759 #endif
1760 #if defined(PETSC_HAVE_HIP)
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1764 #endif
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1770   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1771   PetscFunctionReturn(PETSC_SUCCESS);
1772 }
1773 
1774 /*MC
1775    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1776 
1777    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1778    and `MATMPIDENSE` otherwise.
1779 
1780    Options Database Key:
1781 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1782 
1783   Level: beginner
1784 
1785 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1786 M*/
1787 
1788 /*@C
1789   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1790 
1791   Collective
1792 
1793   Input Parameters:
1794 + B    - the matrix
1795 - data - optional location of matrix data.  Set to `NULL` for PETSc
1796          to control all matrix memory allocation.
1797 
1798   Level: intermediate
1799 
1800   Notes:
1801   The dense format is fully compatible with standard Fortran
1802   storage by columns.
1803 
1804   The data input variable is intended primarily for Fortran programmers
1805   who wish to allocate their own matrix memory space.  Most users should
1806   set `data` to `NULL`.
1807 
1808 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1809 @*/
1810 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1811 {
1812   PetscFunctionBegin;
1813   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1814   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1815   PetscFunctionReturn(PETSC_SUCCESS);
1816 }
1817 
1818 /*@
1819   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1820   array provided by the user. This is useful to avoid copying an array
1821   into a matrix
1822 
1823   Not Collective
1824 
1825   Input Parameters:
1826 + mat   - the matrix
1827 - array - the array in column major order
1828 
1829   Level: developer
1830 
1831   Note:
1832   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1833   freed when the matrix is destroyed.
1834 
1835 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1836           `MatDenseReplaceArray()`
1837 @*/
1838 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1839 {
1840   PetscFunctionBegin;
1841   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1842   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1843   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1844 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1845   mat->offloadmask = PETSC_OFFLOAD_CPU;
1846 #endif
1847   PetscFunctionReturn(PETSC_SUCCESS);
1848 }
1849 
1850 /*@
1851   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1852 
1853   Not Collective
1854 
1855   Input Parameter:
1856 . mat - the matrix
1857 
1858   Level: developer
1859 
1860   Note:
1861   You can only call this after a call to `MatDensePlaceArray()`
1862 
1863 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1864 @*/
1865 PetscErrorCode MatDenseResetArray(Mat mat)
1866 {
1867   PetscFunctionBegin;
1868   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1869   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1870   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1871   PetscFunctionReturn(PETSC_SUCCESS);
1872 }
1873 
1874 /*@
1875   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1876   array provided by the user. This is useful to avoid copying an array
1877   into a matrix
1878 
1879   Not Collective
1880 
1881   Input Parameters:
1882 + mat   - the matrix
1883 - array - the array in column major order
1884 
1885   Level: developer
1886 
1887   Note:
1888   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1889   freed by the user. It will be freed when the matrix is destroyed.
1890 
1891 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1892 @*/
1893 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1894 {
1895   PetscFunctionBegin;
1896   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1897   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1898   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1899 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1900   mat->offloadmask = PETSC_OFFLOAD_CPU;
1901 #endif
1902   PetscFunctionReturn(PETSC_SUCCESS);
1903 }
1904 
1905 /*@C
1906   MatCreateDense - Creates a matrix in `MATDENSE` format.
1907 
1908   Collective
1909 
1910   Input Parameters:
1911 + comm - MPI communicator
1912 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1913 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1914 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1915 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1916 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1917    to control all matrix memory allocation.
1918 
1919   Output Parameter:
1920 . A - the matrix
1921 
1922   Level: intermediate
1923 
1924   Notes:
1925   The dense format is fully compatible with standard Fortran
1926   storage by columns.
1927 
1928   Although local portions of the matrix are stored in column-major
1929   order, the matrix is partitioned across MPI ranks by row.
1930 
1931   The data input variable is intended primarily for Fortran programmers
1932   who wish to allocate their own matrix memory space.  Most users should
1933   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1934 
1935   The user MUST specify either the local or global matrix dimensions
1936   (possibly both).
1937 
1938 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1939 @*/
1940 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1941 {
1942   PetscFunctionBegin;
1943   PetscCall(MatCreate(comm, A));
1944   PetscCall(MatSetSizes(*A, m, n, M, N));
1945   PetscCall(MatSetType(*A, MATDENSE));
1946   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1947   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1948   PetscFunctionReturn(PETSC_SUCCESS);
1949 }
1950 
1951 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1952 {
1953   Mat           mat;
1954   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1955 
1956   PetscFunctionBegin;
1957   *newmat = NULL;
1958   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1959   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1960   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1961   a = (Mat_MPIDense *)mat->data;
1962 
1963   mat->factortype   = A->factortype;
1964   mat->assembled    = PETSC_TRUE;
1965   mat->preallocated = PETSC_TRUE;
1966 
1967   mat->insertmode = NOT_SET_VALUES;
1968   a->donotstash   = oldmat->donotstash;
1969 
1970   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1971   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1972 
1973   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1974 
1975   *newmat = mat;
1976   PetscFunctionReturn(PETSC_SUCCESS);
1977 }
1978 
1979 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1980 {
1981   PetscBool isbinary;
1982 #if defined(PETSC_HAVE_HDF5)
1983   PetscBool ishdf5;
1984 #endif
1985 
1986   PetscFunctionBegin;
1987   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1988   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1989   /* force binary viewer to load .info file if it has not yet done so */
1990   PetscCall(PetscViewerSetUp(viewer));
1991   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1992 #if defined(PETSC_HAVE_HDF5)
1993   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1994 #endif
1995   if (isbinary) {
1996     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1997 #if defined(PETSC_HAVE_HDF5)
1998   } else if (ishdf5) {
1999     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2000 #endif
2001   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2006 {
2007   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2008   Mat           a, b;
2009 
2010   PetscFunctionBegin;
2011   a = matA->A;
2012   b = matB->A;
2013   PetscCall(MatEqual(a, b, flag));
2014   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2015   PetscFunctionReturn(PETSC_SUCCESS);
2016 }
2017 
2018 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2019 {
2020   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2021 
2022   PetscFunctionBegin;
2023   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2024   PetscCall(MatDestroy(&atb->atb));
2025   PetscCall(PetscFree(atb));
2026   PetscFunctionReturn(PETSC_SUCCESS);
2027 }
2028 
2029 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2030 {
2031   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2032 
2033   PetscFunctionBegin;
2034   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2035   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2036   PetscCall(PetscFree(abt));
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 
2040 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2041 {
2042   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2043   Mat_TransMatMultDense *atb;
2044   MPI_Comm               comm;
2045   PetscMPIInt            size, *recvcounts;
2046   PetscScalar           *carray, *sendbuf;
2047   const PetscScalar     *atbarray;
2048   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2049   const PetscInt        *ranges;
2050 
2051   PetscFunctionBegin;
2052   MatCheckProduct(C, 3);
2053   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2054   atb        = (Mat_TransMatMultDense *)C->product->data;
2055   recvcounts = atb->recvcounts;
2056   sendbuf    = atb->sendbuf;
2057 
2058   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2059   PetscCallMPI(MPI_Comm_size(comm, &size));
2060 
2061   /* compute atbarray = aseq^T * bseq */
2062   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2063 
2064   PetscCall(MatGetOwnershipRanges(C, &ranges));
2065 
2066   /* arrange atbarray into sendbuf */
2067   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2068   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2069   for (proc = 0, k = 0; proc < size; proc++) {
2070     for (j = 0; j < cN; j++) {
2071       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2072     }
2073   }
2074   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2075 
2076   /* sum all atbarray to local values of C */
2077   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2078   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2079   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2080   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2081   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2082   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2083   PetscFunctionReturn(PETSC_SUCCESS);
2084 }
2085 
2086 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2087 {
2088   MPI_Comm               comm;
2089   PetscMPIInt            size;
2090   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2091   Mat_TransMatMultDense *atb;
2092   PetscBool              cisdense = PETSC_FALSE;
2093   PetscInt               i;
2094   const PetscInt        *ranges;
2095 
2096   PetscFunctionBegin;
2097   MatCheckProduct(C, 4);
2098   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2099   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2100   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2101     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2102   }
2103 
2104   /* create matrix product C */
2105   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2106 #if defined(PETSC_HAVE_CUDA)
2107   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2108 #endif
2109 #if defined(PETSC_HAVE_HIP)
2110   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2111 #endif
2112   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2113   PetscCall(MatSetUp(C));
2114 
2115   /* create data structure for reuse C */
2116   PetscCallMPI(MPI_Comm_size(comm, &size));
2117   PetscCall(PetscNew(&atb));
2118   cM = C->rmap->N;
2119   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2120   PetscCall(MatGetOwnershipRanges(C, &ranges));
2121   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2122 
2123   C->product->data    = atb;
2124   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2125   PetscFunctionReturn(PETSC_SUCCESS);
2126 }
2127 
2128 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2129 {
2130   MPI_Comm               comm;
2131   PetscMPIInt            i, size;
2132   PetscInt               maxRows, bufsiz;
2133   PetscMPIInt            tag;
2134   PetscInt               alg;
2135   Mat_MatTransMultDense *abt;
2136   Mat_Product           *product = C->product;
2137   PetscBool              flg;
2138 
2139   PetscFunctionBegin;
2140   MatCheckProduct(C, 4);
2141   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2142   /* check local size of A and B */
2143   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2144 
2145   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2146   alg = flg ? 0 : 1;
2147 
2148   /* setup matrix product C */
2149   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2150   PetscCall(MatSetType(C, MATMPIDENSE));
2151   PetscCall(MatSetUp(C));
2152   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2153 
2154   /* create data structure for reuse C */
2155   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2156   PetscCallMPI(MPI_Comm_size(comm, &size));
2157   PetscCall(PetscNew(&abt));
2158   abt->tag = tag;
2159   abt->alg = alg;
2160   switch (alg) {
2161   case 1: /* alg: "cyclic" */
2162     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2163     bufsiz = A->cmap->N * maxRows;
2164     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2165     break;
2166   default: /* alg: "allgatherv" */
2167     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2168     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2169     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2170     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2171     break;
2172   }
2173 
2174   C->product->data                = abt;
2175   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2176   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2177   PetscFunctionReturn(PETSC_SUCCESS);
2178 }
2179 
2180 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2181 {
2182   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2183   Mat_MatTransMultDense *abt;
2184   MPI_Comm               comm;
2185   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2186   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2187   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2188   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2189   const PetscScalar     *av, *bv;
2190   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2191   MPI_Request            reqs[2];
2192   const PetscInt        *ranges;
2193 
2194   PetscFunctionBegin;
2195   MatCheckProduct(C, 3);
2196   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2197   abt = (Mat_MatTransMultDense *)C->product->data;
2198   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2199   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2200   PetscCallMPI(MPI_Comm_size(comm, &size));
2201   PetscCall(MatDenseGetArrayRead(a->A, &av));
2202   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2203   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2204   PetscCall(MatDenseGetLDA(a->A, &i));
2205   PetscCall(PetscBLASIntCast(i, &alda));
2206   PetscCall(MatDenseGetLDA(b->A, &i));
2207   PetscCall(PetscBLASIntCast(i, &blda));
2208   PetscCall(MatDenseGetLDA(c->A, &i));
2209   PetscCall(PetscBLASIntCast(i, &clda));
2210   PetscCall(MatGetOwnershipRanges(B, &ranges));
2211   bn = B->rmap->n;
2212   if (blda == bn) {
2213     sendbuf = (PetscScalar *)bv;
2214   } else {
2215     sendbuf = abt->buf[0];
2216     for (k = 0, i = 0; i < cK; i++) {
2217       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2218     }
2219   }
2220   if (size > 1) {
2221     sendto   = (rank + size - 1) % size;
2222     recvfrom = (rank + size + 1) % size;
2223   } else {
2224     sendto = recvfrom = 0;
2225   }
2226   PetscCall(PetscBLASIntCast(cK, &ck));
2227   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2228   recvisfrom = rank;
2229   for (i = 0; i < size; i++) {
2230     /* we have finished receiving in sending, bufs can be read/modified */
2231     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2232     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2233 
2234     if (nextrecvisfrom != rank) {
2235       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2236       sendsiz = cK * bn;
2237       recvsiz = cK * nextbn;
2238       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2239       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2240       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2241     }
2242 
2243     /* local aseq * sendbuf^T */
2244     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2245     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2246 
2247     if (nextrecvisfrom != rank) {
2248       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2249       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2250     }
2251     bn         = nextbn;
2252     recvisfrom = nextrecvisfrom;
2253     sendbuf    = recvbuf;
2254   }
2255   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2256   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2257   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2258   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2259   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2260   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2261   PetscFunctionReturn(PETSC_SUCCESS);
2262 }
2263 
2264 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2265 {
2266   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2267   Mat_MatTransMultDense *abt;
2268   MPI_Comm               comm;
2269   PetscMPIInt            size;
2270   PetscScalar           *cv, *sendbuf, *recvbuf;
2271   const PetscScalar     *av, *bv;
2272   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2273   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2274   PetscBLASInt           cm, cn, ck, alda, clda;
2275 
2276   PetscFunctionBegin;
2277   MatCheckProduct(C, 3);
2278   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2279   abt = (Mat_MatTransMultDense *)C->product->data;
2280   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2281   PetscCallMPI(MPI_Comm_size(comm, &size));
2282   PetscCall(MatDenseGetArrayRead(a->A, &av));
2283   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2284   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2285   PetscCall(MatDenseGetLDA(a->A, &i));
2286   PetscCall(PetscBLASIntCast(i, &alda));
2287   PetscCall(MatDenseGetLDA(b->A, &blda));
2288   PetscCall(MatDenseGetLDA(c->A, &i));
2289   PetscCall(PetscBLASIntCast(i, &clda));
2290   /* copy transpose of B into buf[0] */
2291   bn      = B->rmap->n;
2292   sendbuf = abt->buf[0];
2293   recvbuf = abt->buf[1];
2294   for (k = 0, j = 0; j < bn; j++) {
2295     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2296   }
2297   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2298   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2299   PetscCall(PetscBLASIntCast(cK, &ck));
2300   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2301   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2302   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2303   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2304   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2305   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2306   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2307   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2308   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2309   PetscFunctionReturn(PETSC_SUCCESS);
2310 }
2311 
2312 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2313 {
2314   Mat_MatTransMultDense *abt;
2315 
2316   PetscFunctionBegin;
2317   MatCheckProduct(C, 3);
2318   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2319   abt = (Mat_MatTransMultDense *)C->product->data;
2320   switch (abt->alg) {
2321   case 1:
2322     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2323     break;
2324   default:
2325     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2326     break;
2327   }
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2332 {
2333   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2334 
2335   PetscFunctionBegin;
2336   PetscCall(MatDestroy(&ab->Ce));
2337   PetscCall(MatDestroy(&ab->Ae));
2338   PetscCall(MatDestroy(&ab->Be));
2339   PetscCall(PetscFree(ab));
2340   PetscFunctionReturn(PETSC_SUCCESS);
2341 }
2342 
2343 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2344 {
2345   Mat_MatMultDense *ab;
2346   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2347 
2348   PetscFunctionBegin;
2349   MatCheckProduct(C, 3);
2350   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2351   ab = (Mat_MatMultDense *)C->product->data;
2352   if (ab->Ae && ab->Ce) {
2353 #if PetscDefined(HAVE_ELEMENTAL)
2354     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2355     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2356     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2357     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2358 #else
2359     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2360 #endif
2361   } else {
2362     const PetscScalar *read;
2363     PetscScalar       *write;
2364     PetscInt           lda;
2365 
2366     PetscCall(MatDenseGetLDA(B, &lda));
2367     PetscCall(MatDenseGetArrayRead(B, &read));
2368     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2369     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2370     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2371       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2372       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2373     }
2374     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2375     PetscCall(MatDenseRestoreArrayRead(B, &read));
2376     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2377   }
2378   PetscFunctionReturn(PETSC_SUCCESS);
2379 }
2380 
2381 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2382 {
2383   Mat_Product      *product = C->product;
2384   PetscInt          alg;
2385   Mat_MatMultDense *ab;
2386   PetscBool         flg;
2387 
2388   PetscFunctionBegin;
2389   MatCheckProduct(C, 4);
2390   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2391   /* check local size of A and B */
2392   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2393              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2394 
2395   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2396   alg = flg ? 0 : 1;
2397 
2398   /* setup C */
2399   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2400   PetscCall(MatSetType(C, MATMPIDENSE));
2401   PetscCall(MatSetUp(C));
2402 
2403   /* create data structure for reuse Cdense */
2404   PetscCall(PetscNew(&ab));
2405 
2406   switch (alg) {
2407   case 1: /* alg: "elemental" */
2408 #if PetscDefined(HAVE_ELEMENTAL)
2409     /* create elemental matrices Ae and Be */
2410     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2411     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2412     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2413     PetscCall(MatSetUp(ab->Ae));
2414     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2415 
2416     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2417     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2418     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2419     PetscCall(MatSetUp(ab->Be));
2420     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2421 
2422     /* compute symbolic Ce = Ae*Be */
2423     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2424     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2425 #else
2426     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2427 #endif
2428     break;
2429   default: /* alg: "petsc" */
2430     ab->Ae = NULL;
2431     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2432     ab->Ce = NULL;
2433     break;
2434   }
2435 
2436   C->product->data       = ab;
2437   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2438   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2439   PetscFunctionReturn(PETSC_SUCCESS);
2440 }
2441 
2442 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2443 {
2444   Mat_Product *product     = C->product;
2445   const char  *algTypes[2] = {"petsc", "elemental"};
2446   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2447   PetscBool    flg = PETSC_FALSE;
2448 
2449   PetscFunctionBegin;
2450   /* Set default algorithm */
2451   alg = 0; /* default is petsc */
2452   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2453   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2454 
2455   /* Get runtime option */
2456   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2457   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2458   PetscOptionsEnd();
2459   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2460 
2461   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2462   C->ops->productsymbolic = MatProductSymbolic_AB;
2463   PetscFunctionReturn(PETSC_SUCCESS);
2464 }
2465 
2466 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2467 {
2468   Mat_Product *product = C->product;
2469   Mat          A = product->A, B = product->B;
2470 
2471   PetscFunctionBegin;
2472   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2473              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2474   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2475   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2476   PetscFunctionReturn(PETSC_SUCCESS);
2477 }
2478 
2479 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2480 {
2481   Mat_Product *product     = C->product;
2482   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2483   PetscInt     alg, nalg = 2;
2484   PetscBool    flg = PETSC_FALSE;
2485 
2486   PetscFunctionBegin;
2487   /* Set default algorithm */
2488   alg = 0; /* default is allgatherv */
2489   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2490   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2491 
2492   /* Get runtime option */
2493   if (product->api_user) {
2494     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2495     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2496     PetscOptionsEnd();
2497   } else {
2498     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2499     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2500     PetscOptionsEnd();
2501   }
2502   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2503 
2504   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2505   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2506   PetscFunctionReturn(PETSC_SUCCESS);
2507 }
2508 
2509 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2510 {
2511   Mat_Product *product = C->product;
2512 
2513   PetscFunctionBegin;
2514   switch (product->type) {
2515   case MATPRODUCT_AB:
2516     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2517     break;
2518   case MATPRODUCT_AtB:
2519     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2520     break;
2521   case MATPRODUCT_ABt:
2522     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2523     break;
2524   default:
2525     break;
2526   }
2527   PetscFunctionReturn(PETSC_SUCCESS);
2528 }
2529