xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4cc2b5b5fe4ffd09e5956b56d7cdc4f43e324103)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right-hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
470 
471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472 {
473   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474   const PetscScalar *ax;
475   PetscScalar       *ay;
476   PetscMemType       axmtype, aymtype;
477 
478   PetscFunctionBegin;
479   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
480   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
481   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
482   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
483   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
484   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
485   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
486   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
491 {
492   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
493   const PetscScalar *ax;
494   PetscScalar       *ay;
495   PetscMemType       axmtype, aymtype;
496 
497   PetscFunctionBegin;
498   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
510 {
511   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
512   const PetscScalar *ax;
513   PetscScalar       *ay;
514   PetscMemType       axmtype, aymtype;
515 
516   PetscFunctionBegin;
517   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
518   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
519   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
520   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
521   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
522   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
523   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
524   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
525   PetscFunctionReturn(PETSC_SUCCESS);
526 }
527 
528 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
529 {
530   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
531   const PetscScalar *ax;
532   PetscScalar       *ay;
533   PetscMemType       axmtype, aymtype;
534 
535   PetscFunctionBegin;
536   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
537   PetscCall(VecSet(yy, 0.0));
538   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
539   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
540   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
541   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
542   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
543   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
544   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
545   PetscFunctionReturn(PETSC_SUCCESS);
546 }
547 
548 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
549 {
550   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
551   const PetscScalar *ax;
552   PetscScalar       *ay;
553   PetscMemType       axmtype, aymtype;
554 
555   PetscFunctionBegin;
556   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
557   PetscCall(VecSet(yy, 0.0));
558   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
559   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
560   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
561   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
562   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
563   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
564   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
565   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
570 {
571   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
572   const PetscScalar *ax;
573   PetscScalar       *ay;
574   PetscMemType       axmtype, aymtype;
575 
576   PetscFunctionBegin;
577   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
578   PetscCall(VecCopy(yy, zz));
579   PetscMPIInt rank;
580   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
581   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
582   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
583   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
584   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
585   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
586   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
587   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
592 {
593   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
594   const PetscScalar *ax;
595   PetscScalar       *ay;
596   PetscMemType       axmtype, aymtype;
597 
598   PetscFunctionBegin;
599   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
600   PetscCall(VecCopy(yy, zz));
601   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
602   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
603   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
604   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
605   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
606   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
607   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
608   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
609   PetscFunctionReturn(PETSC_SUCCESS);
610 }
611 
612 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
613 {
614   PetscFunctionBegin;
615   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
620 {
621   PetscFunctionBegin;
622   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
623   PetscFunctionReturn(PETSC_SUCCESS);
624 }
625 
626 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
627 {
628   PetscFunctionBegin;
629   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
630   PetscFunctionReturn(PETSC_SUCCESS);
631 }
632 
633 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
634 {
635   PetscFunctionBegin;
636   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
641 {
642   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
643   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
644   PetscScalar       *x;
645   const PetscScalar *av;
646 
647   PetscFunctionBegin;
648   PetscCall(VecGetArray(v, &x));
649   PetscCall(VecGetSize(v, &ng));
650   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
651   PetscCall(VecGetLocalSize(v, &nl));
652   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
653   radd = A->rmap->rstart * m;
654   PetscCall(MatDenseGetArrayRead(a->A, &av));
655   PetscCall(MatDenseGetLDA(a->A, &lda));
656   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
657   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
658   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
659   PetscCall(VecRestoreArray(v, &x));
660   PetscFunctionReturn(PETSC_SUCCESS);
661 }
662 
663 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
664 {
665   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
666 
667   PetscFunctionBegin;
668   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
669   PetscCall(MatStashDestroy_Private(&mat->stash));
670   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
671   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
672   PetscCall(MatDestroy(&mdn->A));
673   PetscCall(VecDestroy(&mdn->lvec));
674   PetscCall(PetscSFDestroy(&mdn->Mvctx));
675   PetscCall(VecDestroy(&mdn->cvec));
676   PetscCall(MatDestroy(&mdn->cmat));
677 
678   PetscCall(PetscFree(mat->data));
679   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
680 
681   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
694 #if defined(PETSC_HAVE_ELEMENTAL)
695   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
696 #endif
697 #if defined(PETSC_HAVE_SCALAPACK)
698   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
699 #endif
700   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
701   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
702   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
703 #if defined(PETSC_HAVE_CUDA)
704   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
705   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
706   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
707   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
708   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
709   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
710   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
711   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
712   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
713   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
722 #endif
723 #if defined(PETSC_HAVE_HIP)
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
742 #endif
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
756 
757   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 #include <petscdraw.h>
762 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
763 {
764   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
765   PetscMPIInt       rank;
766   PetscViewerType   vtype;
767   PetscBool         iascii, isdraw;
768   PetscViewer       sviewer;
769   PetscViewerFormat format;
770 
771   PetscFunctionBegin;
772   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
773   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
774   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
775   if (iascii) {
776     PetscCall(PetscViewerGetType(viewer, &vtype));
777     PetscCall(PetscViewerGetFormat(viewer, &format));
778     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
779       MatInfo info;
780       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
781       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
782       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
783                                                    (PetscInt)info.memory));
784       PetscCall(PetscViewerFlush(viewer));
785       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
786       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
787       PetscFunctionReturn(PETSC_SUCCESS);
788     } else if (format == PETSC_VIEWER_ASCII_INFO) {
789       PetscFunctionReturn(PETSC_SUCCESS);
790     }
791   } else if (isdraw) {
792     PetscDraw draw;
793     PetscBool isnull;
794 
795     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
796     PetscCall(PetscDrawIsNull(draw, &isnull));
797     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
798   }
799 
800   {
801     /* assemble the entire matrix onto first processor. */
802     Mat          A;
803     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
804     PetscInt    *cols;
805     PetscScalar *vals;
806 
807     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
808     if (rank == 0) {
809       PetscCall(MatSetSizes(A, M, N, M, N));
810     } else {
811       PetscCall(MatSetSizes(A, 0, 0, M, N));
812     }
813     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
814     PetscCall(MatSetType(A, MATMPIDENSE));
815     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
816 
817     /* Copy the matrix ... This isn't the most efficient means,
818        but it's quick for now */
819     A->insertmode = INSERT_VALUES;
820 
821     row = mat->rmap->rstart;
822     m   = mdn->A->rmap->n;
823     for (i = 0; i < m; i++) {
824       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
825       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
826       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
827       row++;
828     }
829 
830     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
831     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
832     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
833     if (rank == 0) {
834       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
835       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
836     }
837     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
838     PetscCall(MatDestroy(&A));
839   }
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
844 {
845   PetscBool iascii, isbinary, isdraw, issocket;
846 
847   PetscFunctionBegin;
848   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
852 
853   if (iascii || issocket || isdraw) {
854     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
855   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
856   PetscFunctionReturn(PETSC_SUCCESS);
857 }
858 
859 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
860 {
861   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
862   Mat            mdn = mat->A;
863   PetscLogDouble isend[5], irecv[5];
864 
865   PetscFunctionBegin;
866   info->block_size = 1.0;
867 
868   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
869 
870   isend[0] = info->nz_used;
871   isend[1] = info->nz_allocated;
872   isend[2] = info->nz_unneeded;
873   isend[3] = info->memory;
874   isend[4] = info->mallocs;
875   if (flag == MAT_LOCAL) {
876     info->nz_used      = isend[0];
877     info->nz_allocated = isend[1];
878     info->nz_unneeded  = isend[2];
879     info->memory       = isend[3];
880     info->mallocs      = isend[4];
881   } else if (flag == MAT_GLOBAL_MAX) {
882     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
883 
884     info->nz_used      = irecv[0];
885     info->nz_allocated = irecv[1];
886     info->nz_unneeded  = irecv[2];
887     info->memory       = irecv[3];
888     info->mallocs      = irecv[4];
889   } else if (flag == MAT_GLOBAL_SUM) {
890     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
891 
892     info->nz_used      = irecv[0];
893     info->nz_allocated = irecv[1];
894     info->nz_unneeded  = irecv[2];
895     info->memory       = irecv[3];
896     info->mallocs      = irecv[4];
897   }
898   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
899   info->fill_ratio_needed = 0;
900   info->factor_mallocs    = 0;
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
905 {
906   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
907 
908   PetscFunctionBegin;
909   switch (op) {
910   case MAT_NEW_NONZERO_LOCATIONS:
911   case MAT_NEW_NONZERO_LOCATION_ERR:
912   case MAT_NEW_NONZERO_ALLOCATION_ERR:
913     MatCheckPreallocated(A, 1);
914     PetscCall(MatSetOption(a->A, op, flg));
915     break;
916   case MAT_ROW_ORIENTED:
917     MatCheckPreallocated(A, 1);
918     a->roworiented = flg;
919     PetscCall(MatSetOption(a->A, op, flg));
920     break;
921   case MAT_FORCE_DIAGONAL_ENTRIES:
922   case MAT_KEEP_NONZERO_PATTERN:
923   case MAT_USE_HASH_TABLE:
924   case MAT_SORTED_FULL:
925     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
926     break;
927   case MAT_IGNORE_OFF_PROC_ENTRIES:
928     a->donotstash = flg;
929     break;
930   case MAT_SYMMETRIC:
931   case MAT_STRUCTURALLY_SYMMETRIC:
932   case MAT_HERMITIAN:
933   case MAT_SYMMETRY_ETERNAL:
934   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
935   case MAT_SPD:
936   case MAT_IGNORE_LOWER_TRIANGULAR:
937   case MAT_IGNORE_ZERO_ENTRIES:
938   case MAT_SPD_ETERNAL:
939     /* if the diagonal matrix is square it inherits some of the properties above */
940     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
941     break;
942   default:
943     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
944   }
945   PetscFunctionReturn(PETSC_SUCCESS);
946 }
947 
948 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
949 {
950   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
951   const PetscScalar *l;
952   PetscScalar        x, *v, *vv, *r;
953   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
954 
955   PetscFunctionBegin;
956   PetscCall(MatDenseGetArray(mdn->A, &vv));
957   PetscCall(MatDenseGetLDA(mdn->A, &lda));
958   PetscCall(MatGetLocalSize(A, &s2, &s3));
959   if (ll) {
960     PetscCall(VecGetLocalSize(ll, &s2a));
961     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
962     PetscCall(VecGetArrayRead(ll, &l));
963     for (i = 0; i < m; i++) {
964       x = l[i];
965       v = vv + i;
966       for (j = 0; j < n; j++) {
967         (*v) *= x;
968         v += lda;
969       }
970     }
971     PetscCall(VecRestoreArrayRead(ll, &l));
972     PetscCall(PetscLogFlops(1.0 * n * m));
973   }
974   if (rr) {
975     const PetscScalar *ar;
976 
977     PetscCall(VecGetLocalSize(rr, &s3a));
978     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
979     PetscCall(VecGetArrayRead(rr, &ar));
980     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
981     PetscCall(VecGetArray(mdn->lvec, &r));
982     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
983     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
984     PetscCall(VecRestoreArrayRead(rr, &ar));
985     for (i = 0; i < n; i++) {
986       x = r[i];
987       v = vv + i * lda;
988       for (j = 0; j < m; j++) (*v++) *= x;
989     }
990     PetscCall(VecRestoreArray(mdn->lvec, &r));
991     PetscCall(PetscLogFlops(1.0 * n * m));
992   }
993   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
994   PetscFunctionReturn(PETSC_SUCCESS);
995 }
996 
997 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
998 {
999   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1000   PetscInt           i, j;
1001   PetscMPIInt        size;
1002   PetscReal          sum = 0.0;
1003   const PetscScalar *av, *v;
1004 
1005   PetscFunctionBegin;
1006   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1007   v = av;
1008   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1009   if (size == 1) {
1010     PetscCall(MatNorm(mdn->A, type, nrm));
1011   } else {
1012     if (type == NORM_FROBENIUS) {
1013       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1014         sum += PetscRealPart(PetscConj(*v) * (*v));
1015         v++;
1016       }
1017       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1018       *nrm = PetscSqrtReal(*nrm);
1019       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1020     } else if (type == NORM_1) {
1021       PetscReal *tmp, *tmp2;
1022       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
1023       *nrm = 0.0;
1024       v    = av;
1025       for (j = 0; j < mdn->A->cmap->n; j++) {
1026         for (i = 0; i < mdn->A->rmap->n; i++) {
1027           tmp[j] += PetscAbsScalar(*v);
1028           v++;
1029         }
1030       }
1031       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1032       for (j = 0; j < A->cmap->N; j++) {
1033         if (tmp2[j] > *nrm) *nrm = tmp2[j];
1034       }
1035       PetscCall(PetscFree2(tmp, tmp2));
1036       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1037     } else if (type == NORM_INFINITY) { /* max row norm */
1038       PetscReal ntemp;
1039       PetscCall(MatNorm(mdn->A, type, &ntemp));
1040       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1041     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1042   }
1043   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1044   PetscFunctionReturn(PETSC_SUCCESS);
1045 }
1046 
1047 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1048 {
1049   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1050   Mat           B;
1051   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1052   PetscInt      j, i, lda;
1053   PetscScalar  *v;
1054 
1055   PetscFunctionBegin;
1056   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1057   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1058     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1059     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1060     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1061     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1062   } else B = *matout;
1063 
1064   m = a->A->rmap->n;
1065   n = a->A->cmap->n;
1066   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1067   PetscCall(MatDenseGetLDA(a->A, &lda));
1068   PetscCall(PetscMalloc1(m, &rwork));
1069   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1070   for (j = 0; j < n; j++) {
1071     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1072     v = PetscSafePointerPlusOffset(v, lda);
1073   }
1074   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1075   PetscCall(PetscFree(rwork));
1076   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1077   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1078   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1079     *matout = B;
1080   } else {
1081     PetscCall(MatHeaderMerge(A, &B));
1082   }
1083   PetscFunctionReturn(PETSC_SUCCESS);
1084 }
1085 
1086 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1087 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1088 
1089 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1090 {
1091   PetscFunctionBegin;
1092   PetscCall(PetscLayoutSetUp(A->rmap));
1093   PetscCall(PetscLayoutSetUp(A->cmap));
1094   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1095   PetscFunctionReturn(PETSC_SUCCESS);
1096 }
1097 
1098 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1099 {
1100   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1101 
1102   PetscFunctionBegin;
1103   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1108 {
1109   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCall(MatConjugate(a->A));
1113   PetscFunctionReturn(PETSC_SUCCESS);
1114 }
1115 
1116 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1117 {
1118   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1119 
1120   PetscFunctionBegin;
1121   PetscCall(MatRealPart(a->A));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1126 {
1127   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1128 
1129   PetscFunctionBegin;
1130   PetscCall(MatImaginaryPart(a->A));
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1135 {
1136   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1137 
1138   PetscFunctionBegin;
1139   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1140   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1141   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1146 
1147 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1148 {
1149   PetscInt      i, m, n;
1150   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1151   PetscReal    *work;
1152 
1153   PetscFunctionBegin;
1154   PetscCall(MatGetSize(A, &m, &n));
1155   PetscCall(PetscMalloc1(n, &work));
1156   if (type == REDUCTION_MEAN_REALPART) {
1157     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1158   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1159     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1160   } else {
1161     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1162   }
1163   if (type == NORM_2) {
1164     for (i = 0; i < n; i++) work[i] *= work[i];
1165   }
1166   if (type == NORM_INFINITY) {
1167     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1168   } else {
1169     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1170   }
1171   PetscCall(PetscFree(work));
1172   if (type == NORM_2) {
1173     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1174   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1175     for (i = 0; i < n; i++) reductions[i] /= m;
1176   }
1177   PetscFunctionReturn(PETSC_SUCCESS);
1178 }
1179 
1180 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1181 {
1182   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1183 
1184   PetscFunctionBegin;
1185   PetscCall(MatSetRandom(d->A, rctx));
1186 #if defined(PETSC_HAVE_DEVICE)
1187   x->offloadmask = d->A->offloadmask;
1188 #endif
1189   PetscFunctionReturn(PETSC_SUCCESS);
1190 }
1191 
1192 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1193 {
1194   PetscFunctionBegin;
1195   *missing = PETSC_FALSE;
1196   PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198 
1199 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1200 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1201 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1202 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1203 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1204 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1205 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1206 
1207 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1208                                        MatGetRow_MPIDense,
1209                                        MatRestoreRow_MPIDense,
1210                                        MatMult_MPIDense,
1211                                        /*  4*/ MatMultAdd_MPIDense,
1212                                        MatMultTranspose_MPIDense,
1213                                        MatMultTransposeAdd_MPIDense,
1214                                        NULL,
1215                                        NULL,
1216                                        NULL,
1217                                        /* 10*/ NULL,
1218                                        NULL,
1219                                        NULL,
1220                                        NULL,
1221                                        MatTranspose_MPIDense,
1222                                        /* 15*/ MatGetInfo_MPIDense,
1223                                        MatEqual_MPIDense,
1224                                        MatGetDiagonal_MPIDense,
1225                                        MatDiagonalScale_MPIDense,
1226                                        MatNorm_MPIDense,
1227                                        /* 20*/ MatAssemblyBegin_MPIDense,
1228                                        MatAssemblyEnd_MPIDense,
1229                                        MatSetOption_MPIDense,
1230                                        MatZeroEntries_MPIDense,
1231                                        /* 24*/ MatZeroRows_MPIDense,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        /* 29*/ MatSetUp_MPIDense,
1237                                        NULL,
1238                                        NULL,
1239                                        MatGetDiagonalBlock_MPIDense,
1240                                        NULL,
1241                                        /* 34*/ MatDuplicate_MPIDense,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        NULL,
1246                                        /* 39*/ MatAXPY_MPIDense,
1247                                        MatCreateSubMatrices_MPIDense,
1248                                        NULL,
1249                                        MatGetValues_MPIDense,
1250                                        MatCopy_MPIDense,
1251                                        /* 44*/ NULL,
1252                                        MatScale_MPIDense,
1253                                        MatShift_MPIDense,
1254                                        NULL,
1255                                        NULL,
1256                                        /* 49*/ MatSetRandom_MPIDense,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        NULL,
1261                                        /* 54*/ NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1267                                        MatDestroy_MPIDense,
1268                                        MatView_MPIDense,
1269                                        NULL,
1270                                        NULL,
1271                                        /* 64*/ NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        /* 69*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        NULL,
1281                                        /* 74*/ NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        /* 79*/ NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        /* 83*/ MatLoad_MPIDense,
1291                                        NULL,
1292                                        NULL,
1293                                        NULL,
1294                                        NULL,
1295                                        NULL,
1296                                        /* 89*/ NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        NULL,
1301                                        /* 94*/ NULL,
1302                                        NULL,
1303                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1304                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1305                                        NULL,
1306                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1307                                        NULL,
1308                                        NULL,
1309                                        MatConjugate_MPIDense,
1310                                        NULL,
1311                                        /*104*/ NULL,
1312                                        MatRealPart_MPIDense,
1313                                        MatImaginaryPart_MPIDense,
1314                                        NULL,
1315                                        NULL,
1316                                        /*109*/ NULL,
1317                                        NULL,
1318                                        NULL,
1319                                        MatGetColumnVector_MPIDense,
1320                                        MatMissingDiagonal_MPIDense,
1321                                        /*114*/ NULL,
1322                                        NULL,
1323                                        NULL,
1324                                        NULL,
1325                                        NULL,
1326                                        /*119*/ NULL,
1327                                        NULL,
1328                                        MatMultHermitianTranspose_MPIDense,
1329                                        MatMultHermitianTransposeAdd_MPIDense,
1330                                        NULL,
1331                                        /*124*/ NULL,
1332                                        MatGetColumnReductions_MPIDense,
1333                                        NULL,
1334                                        NULL,
1335                                        NULL,
1336                                        /*129*/ NULL,
1337                                        NULL,
1338                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1339                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1340                                        NULL,
1341                                        /*134*/ NULL,
1342                                        NULL,
1343                                        NULL,
1344                                        NULL,
1345                                        NULL,
1346                                        /*139*/ NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1352                                        /*145*/ NULL,
1353                                        NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        NULL,
1357                                        /*150*/ NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL};
1361 
1362 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1363 {
1364   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1365   MatType       mtype = MATSEQDENSE;
1366 
1367   PetscFunctionBegin;
1368   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1369   PetscCall(PetscLayoutSetUp(mat->rmap));
1370   PetscCall(PetscLayoutSetUp(mat->cmap));
1371   if (!a->A) {
1372     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1373     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1374   }
1375 #if defined(PETSC_HAVE_CUDA)
1376   PetscBool iscuda;
1377   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1378   if (iscuda) mtype = MATSEQDENSECUDA;
1379 #endif
1380 #if defined(PETSC_HAVE_HIP)
1381   PetscBool iship;
1382   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1383   if (iship) mtype = MATSEQDENSEHIP;
1384 #endif
1385   PetscCall(MatSetType(a->A, mtype));
1386   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1387 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1388   mat->offloadmask = a->A->offloadmask;
1389 #endif
1390   mat->preallocated = PETSC_TRUE;
1391   mat->assembled    = PETSC_TRUE;
1392   PetscFunctionReturn(PETSC_SUCCESS);
1393 }
1394 
1395 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1396 {
1397   Mat B, C;
1398 
1399   PetscFunctionBegin;
1400   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1401   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1402   PetscCall(MatDestroy(&C));
1403   if (reuse == MAT_REUSE_MATRIX) {
1404     C = *newmat;
1405   } else C = NULL;
1406   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1407   PetscCall(MatDestroy(&B));
1408   if (reuse == MAT_INPLACE_MATRIX) {
1409     PetscCall(MatHeaderReplace(A, &C));
1410   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1411   PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413 
1414 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1415 {
1416   Mat B, C;
1417 
1418   PetscFunctionBegin;
1419   PetscCall(MatDenseGetLocalMatrix(A, &C));
1420   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1421   if (reuse == MAT_REUSE_MATRIX) {
1422     C = *newmat;
1423   } else C = NULL;
1424   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1425   PetscCall(MatDestroy(&B));
1426   if (reuse == MAT_INPLACE_MATRIX) {
1427     PetscCall(MatHeaderReplace(A, &C));
1428   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1429   PetscFunctionReturn(PETSC_SUCCESS);
1430 }
1431 
1432 #if defined(PETSC_HAVE_ELEMENTAL)
1433 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1434 {
1435   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1436   Mat           mat_elemental;
1437   PetscScalar  *v;
1438   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1439 
1440   PetscFunctionBegin;
1441   if (reuse == MAT_REUSE_MATRIX) {
1442     mat_elemental = *newmat;
1443     PetscCall(MatZeroEntries(*newmat));
1444   } else {
1445     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1446     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1447     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1448     PetscCall(MatSetUp(mat_elemental));
1449     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1450   }
1451 
1452   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1453   for (i = 0; i < N; i++) cols[i] = i;
1454   for (i = 0; i < m; i++) rows[i] = rstart + i;
1455 
1456   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1457   PetscCall(MatDenseGetArray(A, &v));
1458   PetscCall(MatDenseGetLDA(a->A, &lda));
1459   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1460   else {
1461     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1462   }
1463   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1464   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1465   PetscCall(MatDenseRestoreArray(A, &v));
1466   PetscCall(PetscFree2(rows, cols));
1467 
1468   if (reuse == MAT_INPLACE_MATRIX) {
1469     PetscCall(MatHeaderReplace(A, &mat_elemental));
1470   } else {
1471     *newmat = mat_elemental;
1472   }
1473   PetscFunctionReturn(PETSC_SUCCESS);
1474 }
1475 #endif
1476 
1477 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1478 {
1479   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1480 
1481   PetscFunctionBegin;
1482   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1483   PetscFunctionReturn(PETSC_SUCCESS);
1484 }
1485 
1486 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1487 {
1488   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1492   PetscFunctionReturn(PETSC_SUCCESS);
1493 }
1494 
1495 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1496 {
1497   Mat_MPIDense *mat;
1498   PetscInt      m, nloc, N;
1499 
1500   PetscFunctionBegin;
1501   PetscCall(MatGetSize(inmat, &m, &N));
1502   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1503   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1504     PetscInt sum;
1505 
1506     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1507     /* Check sum(n) = N */
1508     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1509     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1510 
1511     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1512     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1513   }
1514 
1515   /* numeric phase */
1516   mat = (Mat_MPIDense *)(*outmat)->data;
1517   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1518   PetscFunctionReturn(PETSC_SUCCESS);
1519 }
1520 
1521 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1522 {
1523   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1524   PetscInt      lda;
1525 
1526   PetscFunctionBegin;
1527   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1528   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1529   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1530   a->vecinuse = col + 1;
1531   PetscCall(MatDenseGetLDA(a->A, &lda));
1532   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1533   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1534   *v = a->cvec;
1535   PetscFunctionReturn(PETSC_SUCCESS);
1536 }
1537 
1538 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1539 {
1540   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1541 
1542   PetscFunctionBegin;
1543   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1544   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1545   a->vecinuse = 0;
1546   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1547   PetscCall(VecResetArray(a->cvec));
1548   if (v) *v = NULL;
1549   PetscFunctionReturn(PETSC_SUCCESS);
1550 }
1551 
1552 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1553 {
1554   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1555   PetscInt      lda;
1556 
1557   PetscFunctionBegin;
1558   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1559   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1560   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1561   a->vecinuse = col + 1;
1562   PetscCall(MatDenseGetLDA(a->A, &lda));
1563   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1564   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1565   PetscCall(VecLockReadPush(a->cvec));
1566   *v = a->cvec;
1567   PetscFunctionReturn(PETSC_SUCCESS);
1568 }
1569 
1570 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1571 {
1572   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1573 
1574   PetscFunctionBegin;
1575   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1576   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1577   a->vecinuse = 0;
1578   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1579   PetscCall(VecLockReadPop(a->cvec));
1580   PetscCall(VecResetArray(a->cvec));
1581   if (v) *v = NULL;
1582   PetscFunctionReturn(PETSC_SUCCESS);
1583 }
1584 
1585 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1586 {
1587   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1588   PetscInt      lda;
1589 
1590   PetscFunctionBegin;
1591   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1592   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1593   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1594   a->vecinuse = col + 1;
1595   PetscCall(MatDenseGetLDA(a->A, &lda));
1596   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1597   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1598   *v = a->cvec;
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1603 {
1604   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1605 
1606   PetscFunctionBegin;
1607   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1608   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1609   a->vecinuse = 0;
1610   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1611   PetscCall(VecResetArray(a->cvec));
1612   if (v) *v = NULL;
1613   PetscFunctionReturn(PETSC_SUCCESS);
1614 }
1615 
1616 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1617 {
1618   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1619   Mat_MPIDense *c;
1620   MPI_Comm      comm;
1621   PetscInt      pbegin, pend;
1622 
1623   PetscFunctionBegin;
1624   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1625   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1626   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1627   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1628   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1629   if (!a->cmat) {
1630     PetscCall(MatCreate(comm, &a->cmat));
1631     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1632     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1633     else {
1634       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1635       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1636       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1637     }
1638     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1639     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1640   } else {
1641     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1642     if (same && a->cmat->rmap->N != A->rmap->N) {
1643       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1644       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1645     }
1646     if (!same) {
1647       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1648       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1649       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1650       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1651       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1652     }
1653     if (cend - cbegin != a->cmat->cmap->N) {
1654       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1655       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1656       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1657       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1658     }
1659   }
1660   c = (Mat_MPIDense *)a->cmat->data;
1661   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1662   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1663 
1664   a->cmat->preallocated = PETSC_TRUE;
1665   a->cmat->assembled    = PETSC_TRUE;
1666 #if defined(PETSC_HAVE_DEVICE)
1667   a->cmat->offloadmask = c->A->offloadmask;
1668 #endif
1669   a->matinuse = cbegin + 1;
1670   *v          = a->cmat;
1671   PetscFunctionReturn(PETSC_SUCCESS);
1672 }
1673 
1674 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1675 {
1676   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1677   Mat_MPIDense *c;
1678 
1679   PetscFunctionBegin;
1680   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1681   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1682   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1683   a->matinuse = 0;
1684   c           = (Mat_MPIDense *)a->cmat->data;
1685   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1686   if (v) *v = NULL;
1687 #if defined(PETSC_HAVE_DEVICE)
1688   A->offloadmask = a->A->offloadmask;
1689 #endif
1690   PetscFunctionReturn(PETSC_SUCCESS);
1691 }
1692 
1693 /*MC
1694    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1695 
1696    Options Database Key:
1697 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1698 
1699   Level: beginner
1700 
1701 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1702 M*/
1703 PetscErrorCode MatCreate_MPIDense(Mat mat)
1704 {
1705   Mat_MPIDense *a;
1706 
1707   PetscFunctionBegin;
1708   PetscCall(PetscNew(&a));
1709   mat->data   = (void *)a;
1710   mat->ops[0] = MatOps_Values;
1711 
1712   mat->insertmode = NOT_SET_VALUES;
1713 
1714   /* build cache for off array entries formed */
1715   a->donotstash = PETSC_FALSE;
1716 
1717   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1718 
1719   /* stuff used for matrix vector multiply */
1720   a->lvec        = NULL;
1721   a->Mvctx       = NULL;
1722   a->roworiented = PETSC_TRUE;
1723 
1724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1730   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1745 #if defined(PETSC_HAVE_ELEMENTAL)
1746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1747 #endif
1748 #if defined(PETSC_HAVE_SCALAPACK)
1749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1750 #endif
1751 #if defined(PETSC_HAVE_CUDA)
1752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1753 #endif
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1757 #if defined(PETSC_HAVE_CUDA)
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1760 #endif
1761 #if defined(PETSC_HAVE_HIP)
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1765 #endif
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1771   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1772   PetscFunctionReturn(PETSC_SUCCESS);
1773 }
1774 
1775 /*MC
1776    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1777 
1778    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1779    and `MATMPIDENSE` otherwise.
1780 
1781    Options Database Key:
1782 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1783 
1784   Level: beginner
1785 
1786 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1787 M*/
1788 
1789 /*@
1790   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1791 
1792   Collective
1793 
1794   Input Parameters:
1795 + B    - the matrix
1796 - data - optional location of matrix data.  Set to `NULL` for PETSc
1797          to control all matrix memory allocation.
1798 
1799   Level: intermediate
1800 
1801   Notes:
1802   The dense format is fully compatible with standard Fortran
1803   storage by columns.
1804 
1805   The data input variable is intended primarily for Fortran programmers
1806   who wish to allocate their own matrix memory space.  Most users should
1807   set `data` to `NULL`.
1808 
1809 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1810 @*/
1811 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1812 {
1813   PetscFunctionBegin;
1814   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1815   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1816   PetscFunctionReturn(PETSC_SUCCESS);
1817 }
1818 
1819 /*@
1820   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1821   array provided by the user. This is useful to avoid copying an array
1822   into a matrix
1823 
1824   Not Collective
1825 
1826   Input Parameters:
1827 + mat   - the matrix
1828 - array - the array in column major order
1829 
1830   Level: developer
1831 
1832   Note:
1833   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1834   freed when the matrix is destroyed.
1835 
1836 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1837           `MatDenseReplaceArray()`
1838 @*/
1839 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1840 {
1841   PetscFunctionBegin;
1842   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1843   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1844   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1845 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1846   mat->offloadmask = PETSC_OFFLOAD_CPU;
1847 #endif
1848   PetscFunctionReturn(PETSC_SUCCESS);
1849 }
1850 
1851 /*@
1852   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1853 
1854   Not Collective
1855 
1856   Input Parameter:
1857 . mat - the matrix
1858 
1859   Level: developer
1860 
1861   Note:
1862   You can only call this after a call to `MatDensePlaceArray()`
1863 
1864 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1865 @*/
1866 PetscErrorCode MatDenseResetArray(Mat mat)
1867 {
1868   PetscFunctionBegin;
1869   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1870   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1871   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 /*@
1876   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1877   array provided by the user. This is useful to avoid copying an array
1878   into a matrix
1879 
1880   Not Collective
1881 
1882   Input Parameters:
1883 + mat   - the matrix
1884 - array - the array in column major order
1885 
1886   Level: developer
1887 
1888   Note:
1889   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1890   freed by the user. It will be freed when the matrix is destroyed.
1891 
1892 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1893 @*/
1894 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1895 {
1896   PetscFunctionBegin;
1897   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1898   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1899   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1900 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1901   mat->offloadmask = PETSC_OFFLOAD_CPU;
1902 #endif
1903   PetscFunctionReturn(PETSC_SUCCESS);
1904 }
1905 
1906 /*@
1907   MatCreateDense - Creates a matrix in `MATDENSE` format.
1908 
1909   Collective
1910 
1911   Input Parameters:
1912 + comm - MPI communicator
1913 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1914 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1915 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1916 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1917 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1918    to control all matrix memory allocation.
1919 
1920   Output Parameter:
1921 . A - the matrix
1922 
1923   Level: intermediate
1924 
1925   Notes:
1926   The dense format is fully compatible with standard Fortran
1927   storage by columns.
1928 
1929   Although local portions of the matrix are stored in column-major
1930   order, the matrix is partitioned across MPI ranks by row.
1931 
1932   The data input variable is intended primarily for Fortran programmers
1933   who wish to allocate their own matrix memory space.  Most users should
1934   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1935 
1936   The user MUST specify either the local or global matrix dimensions
1937   (possibly both).
1938 
1939 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1940 @*/
1941 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1942 {
1943   PetscFunctionBegin;
1944   PetscCall(MatCreate(comm, A));
1945   PetscCall(MatSetSizes(*A, m, n, M, N));
1946   PetscCall(MatSetType(*A, MATDENSE));
1947   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1948   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1949   PetscFunctionReturn(PETSC_SUCCESS);
1950 }
1951 
1952 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1953 {
1954   Mat           mat;
1955   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1956 
1957   PetscFunctionBegin;
1958   *newmat = NULL;
1959   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1960   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1961   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1962   a = (Mat_MPIDense *)mat->data;
1963 
1964   mat->factortype   = A->factortype;
1965   mat->assembled    = PETSC_TRUE;
1966   mat->preallocated = PETSC_TRUE;
1967 
1968   mat->insertmode = NOT_SET_VALUES;
1969   a->donotstash   = oldmat->donotstash;
1970 
1971   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1972   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1973 
1974   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1975 
1976   *newmat = mat;
1977   PetscFunctionReturn(PETSC_SUCCESS);
1978 }
1979 
1980 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1981 {
1982   PetscBool isbinary;
1983 #if defined(PETSC_HAVE_HDF5)
1984   PetscBool ishdf5;
1985 #endif
1986 
1987   PetscFunctionBegin;
1988   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1989   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1990   /* force binary viewer to load .info file if it has not yet done so */
1991   PetscCall(PetscViewerSetUp(viewer));
1992   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1993 #if defined(PETSC_HAVE_HDF5)
1994   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1995 #endif
1996   if (isbinary) {
1997     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1998 #if defined(PETSC_HAVE_HDF5)
1999   } else if (ishdf5) {
2000     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2001 #endif
2002   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2007 {
2008   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2009   Mat           a, b;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A;
2013   b = matB->A;
2014   PetscCall(MatEqual(a, b, flag));
2015   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2016   PetscFunctionReturn(PETSC_SUCCESS);
2017 }
2018 
2019 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2020 {
2021   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2022 
2023   PetscFunctionBegin;
2024   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2025   PetscCall(MatDestroy(&atb->atb));
2026   PetscCall(PetscFree(atb));
2027   PetscFunctionReturn(PETSC_SUCCESS);
2028 }
2029 
2030 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2031 {
2032   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2033 
2034   PetscFunctionBegin;
2035   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2036   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2037   PetscCall(PetscFree(abt));
2038   PetscFunctionReturn(PETSC_SUCCESS);
2039 }
2040 
2041 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2042 {
2043   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2044   Mat_TransMatMultDense *atb;
2045   MPI_Comm               comm;
2046   PetscMPIInt            size, *recvcounts;
2047   PetscScalar           *carray, *sendbuf;
2048   const PetscScalar     *atbarray;
2049   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2050   const PetscInt        *ranges;
2051 
2052   PetscFunctionBegin;
2053   MatCheckProduct(C, 3);
2054   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2055   atb        = (Mat_TransMatMultDense *)C->product->data;
2056   recvcounts = atb->recvcounts;
2057   sendbuf    = atb->sendbuf;
2058 
2059   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2060   PetscCallMPI(MPI_Comm_size(comm, &size));
2061 
2062   /* compute atbarray = aseq^T * bseq */
2063   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2064 
2065   PetscCall(MatGetOwnershipRanges(C, &ranges));
2066 
2067   /* arrange atbarray into sendbuf */
2068   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2069   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2070   for (proc = 0, k = 0; proc < size; proc++) {
2071     for (j = 0; j < cN; j++) {
2072       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2073     }
2074   }
2075   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2076 
2077   /* sum all atbarray to local values of C */
2078   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2079   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2080   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2081   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2082   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2083   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2088 {
2089   MPI_Comm               comm;
2090   PetscMPIInt            size;
2091   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2092   Mat_TransMatMultDense *atb;
2093   PetscBool              cisdense = PETSC_FALSE;
2094   PetscInt               i;
2095   const PetscInt        *ranges;
2096 
2097   PetscFunctionBegin;
2098   MatCheckProduct(C, 4);
2099   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2100   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2101   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2102     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2103   }
2104 
2105   /* create matrix product C */
2106   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2107 #if defined(PETSC_HAVE_CUDA)
2108   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2109 #endif
2110 #if defined(PETSC_HAVE_HIP)
2111   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2112 #endif
2113   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2114   PetscCall(MatSetUp(C));
2115 
2116   /* create data structure for reuse C */
2117   PetscCallMPI(MPI_Comm_size(comm, &size));
2118   PetscCall(PetscNew(&atb));
2119   cM = C->rmap->N;
2120   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2121   PetscCall(MatGetOwnershipRanges(C, &ranges));
2122   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2123 
2124   C->product->data    = atb;
2125   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2130 {
2131   MPI_Comm               comm;
2132   PetscMPIInt            i, size;
2133   PetscInt               maxRows, bufsiz;
2134   PetscMPIInt            tag;
2135   PetscInt               alg;
2136   Mat_MatTransMultDense *abt;
2137   Mat_Product           *product = C->product;
2138   PetscBool              flg;
2139 
2140   PetscFunctionBegin;
2141   MatCheckProduct(C, 4);
2142   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2143   /* check local size of A and B */
2144   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2145 
2146   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2147   alg = flg ? 0 : 1;
2148 
2149   /* setup matrix product C */
2150   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2151   PetscCall(MatSetType(C, MATMPIDENSE));
2152   PetscCall(MatSetUp(C));
2153   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2154 
2155   /* create data structure for reuse C */
2156   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2157   PetscCallMPI(MPI_Comm_size(comm, &size));
2158   PetscCall(PetscNew(&abt));
2159   abt->tag = tag;
2160   abt->alg = alg;
2161   switch (alg) {
2162   case 1: /* alg: "cyclic" */
2163     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2164     bufsiz = A->cmap->N * maxRows;
2165     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2166     break;
2167   default: /* alg: "allgatherv" */
2168     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2169     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2170     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2171     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2172     break;
2173   }
2174 
2175   C->product->data                = abt;
2176   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2177   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2178   PetscFunctionReturn(PETSC_SUCCESS);
2179 }
2180 
2181 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2182 {
2183   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2184   Mat_MatTransMultDense *abt;
2185   MPI_Comm               comm;
2186   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2187   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2188   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2189   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2190   const PetscScalar     *av, *bv;
2191   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2192   MPI_Request            reqs[2];
2193   const PetscInt        *ranges;
2194 
2195   PetscFunctionBegin;
2196   MatCheckProduct(C, 3);
2197   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2198   abt = (Mat_MatTransMultDense *)C->product->data;
2199   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2200   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2201   PetscCallMPI(MPI_Comm_size(comm, &size));
2202   PetscCall(MatDenseGetArrayRead(a->A, &av));
2203   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2204   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2205   PetscCall(MatDenseGetLDA(a->A, &i));
2206   PetscCall(PetscBLASIntCast(i, &alda));
2207   PetscCall(MatDenseGetLDA(b->A, &i));
2208   PetscCall(PetscBLASIntCast(i, &blda));
2209   PetscCall(MatDenseGetLDA(c->A, &i));
2210   PetscCall(PetscBLASIntCast(i, &clda));
2211   PetscCall(MatGetOwnershipRanges(B, &ranges));
2212   bn = B->rmap->n;
2213   if (blda == bn) {
2214     sendbuf = (PetscScalar *)bv;
2215   } else {
2216     sendbuf = abt->buf[0];
2217     for (k = 0, i = 0; i < cK; i++) {
2218       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2219     }
2220   }
2221   if (size > 1) {
2222     sendto   = (rank + size - 1) % size;
2223     recvfrom = (rank + size + 1) % size;
2224   } else {
2225     sendto = recvfrom = 0;
2226   }
2227   PetscCall(PetscBLASIntCast(cK, &ck));
2228   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2229   recvisfrom = rank;
2230   for (i = 0; i < size; i++) {
2231     /* we have finished receiving in sending, bufs can be read/modified */
2232     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2233     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2234 
2235     if (nextrecvisfrom != rank) {
2236       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2237       sendsiz = cK * bn;
2238       recvsiz = cK * nextbn;
2239       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2240       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2241       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2242     }
2243 
2244     /* local aseq * sendbuf^T */
2245     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2246     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2247 
2248     if (nextrecvisfrom != rank) {
2249       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2250       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2251     }
2252     bn         = nextbn;
2253     recvisfrom = nextrecvisfrom;
2254     sendbuf    = recvbuf;
2255   }
2256   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2257   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2258   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2259   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2260   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2261   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2262   PetscFunctionReturn(PETSC_SUCCESS);
2263 }
2264 
2265 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2266 {
2267   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2268   Mat_MatTransMultDense *abt;
2269   MPI_Comm               comm;
2270   PetscMPIInt            size;
2271   PetscScalar           *cv, *sendbuf, *recvbuf;
2272   const PetscScalar     *av, *bv;
2273   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2274   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2275   PetscBLASInt           cm, cn, ck, alda, clda;
2276 
2277   PetscFunctionBegin;
2278   MatCheckProduct(C, 3);
2279   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2280   abt = (Mat_MatTransMultDense *)C->product->data;
2281   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2282   PetscCallMPI(MPI_Comm_size(comm, &size));
2283   PetscCall(MatDenseGetArrayRead(a->A, &av));
2284   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2285   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2286   PetscCall(MatDenseGetLDA(a->A, &i));
2287   PetscCall(PetscBLASIntCast(i, &alda));
2288   PetscCall(MatDenseGetLDA(b->A, &blda));
2289   PetscCall(MatDenseGetLDA(c->A, &i));
2290   PetscCall(PetscBLASIntCast(i, &clda));
2291   /* copy transpose of B into buf[0] */
2292   bn      = B->rmap->n;
2293   sendbuf = abt->buf[0];
2294   recvbuf = abt->buf[1];
2295   for (k = 0, j = 0; j < bn; j++) {
2296     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2297   }
2298   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2299   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2300   PetscCall(PetscBLASIntCast(cK, &ck));
2301   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2302   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2303   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2304   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2305   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2306   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2307   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2308   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2309   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2310   PetscFunctionReturn(PETSC_SUCCESS);
2311 }
2312 
2313 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2314 {
2315   Mat_MatTransMultDense *abt;
2316 
2317   PetscFunctionBegin;
2318   MatCheckProduct(C, 3);
2319   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2320   abt = (Mat_MatTransMultDense *)C->product->data;
2321   switch (abt->alg) {
2322   case 1:
2323     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2324     break;
2325   default:
2326     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2327     break;
2328   }
2329   PetscFunctionReturn(PETSC_SUCCESS);
2330 }
2331 
2332 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2333 {
2334   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2335 
2336   PetscFunctionBegin;
2337   PetscCall(MatDestroy(&ab->Ce));
2338   PetscCall(MatDestroy(&ab->Ae));
2339   PetscCall(MatDestroy(&ab->Be));
2340   PetscCall(PetscFree(ab));
2341   PetscFunctionReturn(PETSC_SUCCESS);
2342 }
2343 
2344 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2345 {
2346   Mat_MatMultDense *ab;
2347   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2348 
2349   PetscFunctionBegin;
2350   MatCheckProduct(C, 3);
2351   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2352   ab = (Mat_MatMultDense *)C->product->data;
2353   if (ab->Ae && ab->Ce) {
2354 #if PetscDefined(HAVE_ELEMENTAL)
2355     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2356     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2357     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2358     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2359 #else
2360     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2361 #endif
2362   } else {
2363     const PetscScalar *read;
2364     PetscScalar       *write;
2365     PetscInt           lda;
2366 
2367     PetscCall(MatDenseGetLDA(B, &lda));
2368     PetscCall(MatDenseGetArrayRead(B, &read));
2369     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2370     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2371     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2372       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2373       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2374     }
2375     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2376     PetscCall(MatDenseRestoreArrayRead(B, &read));
2377     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2378   }
2379   PetscFunctionReturn(PETSC_SUCCESS);
2380 }
2381 
2382 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2383 {
2384   Mat_Product      *product = C->product;
2385   PetscInt          alg;
2386   Mat_MatMultDense *ab;
2387   PetscBool         flg;
2388 
2389   PetscFunctionBegin;
2390   MatCheckProduct(C, 4);
2391   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2392   /* check local size of A and B */
2393   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2394              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2395 
2396   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2397   alg = flg ? 0 : 1;
2398 
2399   /* setup C */
2400   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2401   PetscCall(MatSetType(C, MATMPIDENSE));
2402   PetscCall(MatSetUp(C));
2403 
2404   /* create data structure for reuse Cdense */
2405   PetscCall(PetscNew(&ab));
2406 
2407   switch (alg) {
2408   case 1: /* alg: "elemental" */
2409 #if PetscDefined(HAVE_ELEMENTAL)
2410     /* create elemental matrices Ae and Be */
2411     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2412     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2413     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2414     PetscCall(MatSetUp(ab->Ae));
2415     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2416 
2417     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2418     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2419     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2420     PetscCall(MatSetUp(ab->Be));
2421     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2422 
2423     /* compute symbolic Ce = Ae*Be */
2424     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2425     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2426 #else
2427     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2428 #endif
2429     break;
2430   default: /* alg: "petsc" */
2431     ab->Ae = NULL;
2432     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2433     ab->Ce = NULL;
2434     break;
2435   }
2436 
2437   C->product->data       = ab;
2438   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2439   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2440   PetscFunctionReturn(PETSC_SUCCESS);
2441 }
2442 
2443 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2444 {
2445   Mat_Product *product     = C->product;
2446   const char  *algTypes[2] = {"petsc", "elemental"};
2447   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2448   PetscBool    flg = PETSC_FALSE;
2449 
2450   PetscFunctionBegin;
2451   /* Set default algorithm */
2452   alg = 0; /* default is petsc */
2453   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2454   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2455 
2456   /* Get runtime option */
2457   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2458   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2459   PetscOptionsEnd();
2460   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2461 
2462   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2463   C->ops->productsymbolic = MatProductSymbolic_AB;
2464   PetscFunctionReturn(PETSC_SUCCESS);
2465 }
2466 
2467 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2468 {
2469   Mat_Product *product = C->product;
2470   Mat          A = product->A, B = product->B;
2471 
2472   PetscFunctionBegin;
2473   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2474              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2475   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2476   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2477   PetscFunctionReturn(PETSC_SUCCESS);
2478 }
2479 
2480 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2481 {
2482   Mat_Product *product     = C->product;
2483   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2484   PetscInt     alg, nalg = 2;
2485   PetscBool    flg = PETSC_FALSE;
2486 
2487   PetscFunctionBegin;
2488   /* Set default algorithm */
2489   alg = 0; /* default is allgatherv */
2490   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2491   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2492 
2493   /* Get runtime option */
2494   if (product->api_user) {
2495     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2496     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2497     PetscOptionsEnd();
2498   } else {
2499     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2500     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2501     PetscOptionsEnd();
2502   }
2503   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2504 
2505   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2506   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2507   PetscFunctionReturn(PETSC_SUCCESS);
2508 }
2509 
2510 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2511 {
2512   Mat_Product *product = C->product;
2513 
2514   PetscFunctionBegin;
2515   switch (product->type) {
2516   case MATPRODUCT_AB:
2517     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2518     break;
2519   case MATPRODUCT_AtB:
2520     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2521     break;
2522   case MATPRODUCT_ABt:
2523     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2524     break;
2525   default:
2526     break;
2527   }
2528   PetscFunctionReturn(PETSC_SUCCESS);
2529 }
2530