xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 174dc0c8cee294b82b85e4dd3b331b29396264fc)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 #include <petscdevice.h>
12 #include <petsc/private/deviceimpl.h>
13 
14 /*@
15   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16   matrix that represents the operator. For sequential matrices it returns itself.
17 
18   Input Parameter:
19 . A - the sequential or MPI `MATDENSE` matrix
20 
21   Output Parameter:
22 . B - the inner matrix
23 
24   Level: intermediate
25 
26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27 @*/
28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29 {
30   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31   PetscBool     flg;
32 
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
35   PetscAssertPointer(B, 2);
36   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37   if (flg) *B = mat->A;
38   else {
39     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41     *B = A;
42   }
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47 {
48   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
50 
51   PetscFunctionBegin;
52   PetscCall(MatCopy(Amat->A, Bmat->A, s));
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57 {
58   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60   PetscScalar  *v;
61 
62   PetscFunctionBegin;
63   PetscCall(MatDenseGetArray(mat->A, &v));
64   PetscCall(MatDenseGetLDA(mat->A, &lda));
65   rend2 = PetscMin(rend, A->cmap->N);
66   if (rend2 > rstart) {
67     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68     PetscCall(PetscLogFlops(rend2 - rstart));
69   }
70   PetscCall(MatDenseRestoreArray(mat->A, &v));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75 {
76   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
78 
79   PetscFunctionBegin;
80   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81   lrow = row - rstart;
82   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87 {
88   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
90 
91   PetscFunctionBegin;
92   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93   lrow = row - rstart;
94   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
102   PetscScalar  *array;
103   MPI_Comm      comm;
104   PetscBool     flg;
105   Mat           B;
106 
107   PetscFunctionBegin;
108   PetscCall(MatHasCongruentLayouts(A, &flg));
109   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112 #if PetscDefined(HAVE_CUDA)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115 #elif PetscDefined(HAVE_HIP)
116     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118 #endif
119     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120     PetscCall(MatCreate(comm, &B));
121     PetscCall(MatSetSizes(B, m, m, m, m));
122     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127     *a = B;
128     PetscCall(MatDestroy(&B));
129   } else *a = B;
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134 {
135   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137   PetscBool     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i = 0; i < m; i++) {
141     if (idxm[i] < 0) continue;
142     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147       } else {
148         for (j = 0; j < n; j++) {
149           if (idxn[j] < 0) continue;
150           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152         }
153       }
154     } else if (!A->donotstash) {
155       mat->assembled = PETSC_FALSE;
156       if (roworiented) {
157         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158       } else {
159         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160       }
161     }
162   }
163   PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 
166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167 {
168   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
170 
171   PetscFunctionBegin;
172   for (i = 0; i < m; i++) {
173     if (idxm[i] < 0) continue; /* negative row */
174     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175     if (idxm[i] >= rstart && idxm[i] < rend) {
176       row = idxm[i] - rstart;
177       for (j = 0; j < n; j++) {
178         if (idxn[j] < 0) continue; /* negative column */
179         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181       }
182     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
188 {
189   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
190 
191   PetscFunctionBegin;
192   PetscCall(MatDenseGetLDA(a->A, lda));
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
196 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
197 {
198   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
199   MatType       mtype = MATSEQDENSE;
200 
201   PetscFunctionBegin;
202   if (!a->A) {
203     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
204     PetscCall(PetscLayoutSetUp(A->rmap));
205     PetscCall(PetscLayoutSetUp(A->cmap));
206     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
207     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
208 #if PetscDefined(HAVE_CUDA)
209     PetscBool iscuda;
210     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
211     if (iscuda) mtype = MATSEQDENSECUDA;
212 #elif PetscDefined(HAVE_HIP)
213     PetscBool iship;
214     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
215     if (iship) mtype = MATSEQDENSEHIP;
216 #endif
217     PetscCall(MatSetType(a->A, mtype));
218   }
219   PetscCall(MatDenseSetLDA(a->A, lda));
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
223 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
224 {
225   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
226 
227   PetscFunctionBegin;
228   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
229   PetscCall(MatDenseGetArray(a->A, array));
230   PetscFunctionReturn(PETSC_SUCCESS);
231 }
232 
233 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
234 {
235   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
236 
237   PetscFunctionBegin;
238   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
239   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
244 {
245   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
246 
247   PetscFunctionBegin;
248   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
249   PetscCall(MatDenseGetArrayWrite(a->A, array));
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
254 {
255   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
256 
257   PetscFunctionBegin;
258   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
259   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
260   PetscCall(MatDensePlaceArray(a->A, array));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
265 {
266   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
267 
268   PetscFunctionBegin;
269   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
270   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
271   PetscCall(MatDenseResetArray(a->A));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
276 {
277   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
278 
279   PetscFunctionBegin;
280   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
281   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
282   PetscCall(MatDenseReplaceArray(a->A, array));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 
286 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
287 {
288   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
289   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
290   const PetscInt    *irow, *icol;
291   const PetscScalar *v;
292   PetscScalar       *bv;
293   Mat                newmat;
294   IS                 iscol_local;
295   MPI_Comm           comm_is, comm_mat;
296 
297   PetscFunctionBegin;
298   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
299   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
300   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
301 
302   PetscCall(ISAllGather(iscol, &iscol_local));
303   PetscCall(ISGetIndices(isrow, &irow));
304   PetscCall(ISGetIndices(iscol_local, &icol));
305   PetscCall(ISGetLocalSize(isrow, &nrows));
306   PetscCall(ISGetLocalSize(iscol, &ncols));
307   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
308 
309   /* No parallel redistribution currently supported! Should really check each index set
310      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
311      original matrix! */
312 
313   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
314   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
315 
316   /* Check submatrix call */
317   if (scall == MAT_REUSE_MATRIX) {
318     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
319     /* Really need to test rows and column sizes! */
320     newmat = *B;
321   } else {
322     /* Create and fill new matrix */
323     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
324     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
325     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
326     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
327   }
328 
329   /* Now extract the data pointers and do the copy, column at a time */
330   newmatd = (Mat_MPIDense *)newmat->data;
331   PetscCall(MatDenseGetArray(newmatd->A, &bv));
332   PetscCall(MatDenseGetArrayRead(mat->A, &v));
333   PetscCall(MatDenseGetLDA(mat->A, &lda));
334   for (i = 0; i < Ncols; i++) {
335     const PetscScalar *av = v + lda * icol[i];
336     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
337   }
338   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
339   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
340 
341   /* Assemble the matrices so that the correct flags are set */
342   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
343   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
344 
345   /* Free work space */
346   PetscCall(ISRestoreIndices(isrow, &irow));
347   PetscCall(ISRestoreIndices(iscol_local, &icol));
348   PetscCall(ISDestroy(&iscol_local));
349   *B = newmat;
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
353 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
354 {
355   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
356 
357   PetscFunctionBegin;
358   PetscCall(MatDenseRestoreArray(a->A, array));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
363 {
364   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
365 
366   PetscFunctionBegin;
367   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
372 {
373   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
374 
375   PetscFunctionBegin;
376   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
381 {
382   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
383   PetscInt      nstash, reallocs;
384 
385   PetscFunctionBegin;
386   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
387 
388   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
389   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
390   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
395 {
396   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
397   PetscInt      i, *row, *col, flg, j, rstart, ncols;
398   PetscMPIInt   n;
399   PetscScalar  *val;
400 
401   PetscFunctionBegin;
402   if (!mdn->donotstash && !mat->nooffprocentries) {
403     /*  wait on receives */
404     while (1) {
405       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
406       if (!flg) break;
407 
408       for (i = 0; i < n;) {
409         /* Now identify the consecutive vals belonging to the same row */
410         for (j = i, rstart = row[j]; j < n; j++) {
411           if (row[j] != rstart) break;
412         }
413         if (j < n) ncols = j - i;
414         else ncols = n - i;
415         /* Now assemble all these values with a single function call */
416         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
417         i = j;
418       }
419     }
420     PetscCall(MatStashScatterEnd_Private(&mat->stash));
421   }
422 
423   PetscCall(MatAssemblyBegin(mdn->A, mode));
424   PetscCall(MatAssemblyEnd(mdn->A, mode));
425   PetscFunctionReturn(PETSC_SUCCESS);
426 }
427 
428 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
429 {
430   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
431 
432   PetscFunctionBegin;
433   PetscCall(MatZeroEntries(l->A));
434   PetscFunctionReturn(PETSC_SUCCESS);
435 }
436 
437 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
438 {
439   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
440   PetscInt      i, len, *lrows;
441 
442   PetscFunctionBegin;
443   /* get locally owned rows */
444   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
445   /* fix right-hand side if needed */
446   if (x && b) {
447     const PetscScalar *xx;
448     PetscScalar       *bb;
449 
450     PetscCall(VecGetArrayRead(x, &xx));
451     PetscCall(VecGetArrayWrite(b, &bb));
452     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
453     PetscCall(VecRestoreArrayRead(x, &xx));
454     PetscCall(VecRestoreArrayWrite(b, &bb));
455   }
456   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
457   if (diag != 0.0) {
458     Vec d;
459 
460     PetscCall(MatCreateVecs(A, NULL, &d));
461     PetscCall(VecSet(d, diag));
462     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
463     PetscCall(VecDestroy(&d));
464   }
465   PetscCall(PetscFree(lrows));
466   PetscFunctionReturn(PETSC_SUCCESS);
467 }
468 
469 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
471 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
472 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
473 
474 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
475 {
476   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
477   const PetscScalar *ax;
478   PetscScalar       *ay;
479   PetscMemType       axmtype, aymtype;
480 
481   PetscFunctionBegin;
482   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
483   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
484   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
485   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
486   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
487   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
488   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
489   PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
493 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
494 {
495   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
496   const PetscScalar *ax;
497   PetscScalar       *ay;
498   PetscMemType       axmtype, aymtype;
499 
500   PetscFunctionBegin;
501   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
502   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
503   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
504   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
505   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
506   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
507   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
508   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
513 {
514   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
515   const PetscScalar *ax;
516   PetscScalar       *ay;
517   PetscMemType       axmtype, aymtype;
518 
519   PetscFunctionBegin;
520   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
521   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
523   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
524   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
525   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
526   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
527   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
532 {
533   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType       axmtype, aymtype;
537 
538   PetscFunctionBegin;
539   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
540   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
542   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
543   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
544   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
545   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
546   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   const PetscScalar *ax;
554   PetscScalar       *ay;
555   PetscMemType       axmtype, aymtype;
556   PetscInt           r_start, r_end;
557   PetscInt           c_start_local, c_end_local;
558 
559   PetscFunctionBegin;
560   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
561   PetscCall(VecZeroEntries(a->lvec));
562   PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
563   c_start_local = PetscMax(c_start, r_start);
564   c_end_local   = PetscMin(c_end, r_end);
565   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
566   if (c_end_local > c_start_local) {
567     if (PetscMemTypeHost(aymtype)) {
568       PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
569     } else {
570       PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
571       PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
572     }
573   }
574   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
575   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
576   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
577   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
578   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
579   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
580   PetscFunctionReturn(PETSC_SUCCESS);
581 }
582 
583 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
584 {
585   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
586   const PetscScalar *ax;
587   PetscScalar       *ay;
588   PetscMemType       axmtype, aymtype;
589 
590   PetscFunctionBegin;
591   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
592   PetscCall(VecSet(yy, 0.0));
593   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
594   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
595   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
596   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
597   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
598   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
599   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
600   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
601   PetscFunctionReturn(PETSC_SUCCESS);
602 }
603 
604 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
605 {
606   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
607   const PetscScalar *ax;
608   PetscScalar       *ay;
609   PetscMemType       axmtype, aymtype;
610 
611   PetscFunctionBegin;
612   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
613   PetscCall(VecCopy(yy, zz));
614   PetscCall(VecZeroEntries(a->lvec));
615   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
616   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
617   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
618   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
619   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
620   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
621   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
626 {
627   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
628   const PetscScalar *ax;
629   PetscScalar       *ay;
630   PetscMemType       axmtype, aymtype;
631 
632   PetscFunctionBegin;
633   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
634   PetscCall(VecCopy(yy, zz));
635   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
636   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
637   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
638   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
639   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
640   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
641   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
642   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
647 {
648   PetscFunctionBegin;
649   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
654 {
655   PetscFunctionBegin;
656   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
657   PetscFunctionReturn(PETSC_SUCCESS);
658 }
659 
660 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
661 {
662   PetscFunctionBegin;
663   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
668 {
669   PetscFunctionBegin;
670   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
671   PetscFunctionReturn(PETSC_SUCCESS);
672 }
673 
674 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
675 {
676   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
677   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
678   PetscScalar       *x;
679   const PetscScalar *av;
680 
681   PetscFunctionBegin;
682   PetscCall(VecGetArray(v, &x));
683   PetscCall(VecGetSize(v, &ng));
684   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
685   PetscCall(VecGetLocalSize(v, &nl));
686   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
687   radd = A->rmap->rstart * m;
688   PetscCall(MatDenseGetArrayRead(a->A, &av));
689   PetscCall(MatDenseGetLDA(a->A, &lda));
690   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
691   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
692   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
693   PetscCall(VecRestoreArray(v, &x));
694   PetscFunctionReturn(PETSC_SUCCESS);
695 }
696 
697 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
698 {
699   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
700 
701   PetscFunctionBegin;
702   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
703   PetscCall(MatStashDestroy_Private(&mat->stash));
704   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
705   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
706   PetscCall(MatDestroy(&mdn->A));
707   PetscCall(VecDestroy(&mdn->lvec));
708   PetscCall(PetscSFDestroy(&mdn->Mvctx));
709   PetscCall(VecDestroy(&mdn->cvec));
710   PetscCall(MatDestroy(&mdn->cmat));
711 
712   PetscCall(PetscFree(mat->data));
713   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
714 
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
728 #if defined(PETSC_HAVE_ELEMENTAL)
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
730 #endif
731 #if defined(PETSC_HAVE_SCALAPACK)
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
733 #endif
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
737 #if defined(PETSC_HAVE_CUDA)
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
756 #endif
757 #if defined(PETSC_HAVE_HIP)
758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
776 #endif
777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
782   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
790   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
791 
792   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
793   PetscFunctionReturn(PETSC_SUCCESS);
794 }
795 
796 #include <petscdraw.h>
797 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
798 {
799   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
800   PetscMPIInt       rank;
801   PetscViewerType   vtype;
802   PetscBool         iascii, isdraw;
803   PetscViewer       sviewer;
804   PetscViewerFormat format;
805 
806   PetscFunctionBegin;
807   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
808   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
809   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
810   if (iascii) {
811     PetscCall(PetscViewerGetType(viewer, &vtype));
812     PetscCall(PetscViewerGetFormat(viewer, &format));
813     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
814       MatInfo info;
815       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
816       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
817       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
818                                                    (PetscInt)info.memory));
819       PetscCall(PetscViewerFlush(viewer));
820       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
821       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
822       PetscFunctionReturn(PETSC_SUCCESS);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       PetscFunctionReturn(PETSC_SUCCESS);
825     }
826   } else if (isdraw) {
827     PetscDraw draw;
828     PetscBool isnull;
829 
830     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
831     PetscCall(PetscDrawIsNull(draw, &isnull));
832     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
833   }
834 
835   {
836     /* assemble the entire matrix onto first processor. */
837     Mat          A;
838     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
839     PetscInt    *cols;
840     PetscScalar *vals;
841 
842     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
843     if (rank == 0) {
844       PetscCall(MatSetSizes(A, M, N, M, N));
845     } else {
846       PetscCall(MatSetSizes(A, 0, 0, M, N));
847     }
848     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
849     PetscCall(MatSetType(A, MATMPIDENSE));
850     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
851 
852     /* Copy the matrix ... This isn't the most efficient means,
853        but it's quick for now */
854     A->insertmode = INSERT_VALUES;
855 
856     row = mat->rmap->rstart;
857     m   = mdn->A->rmap->n;
858     for (i = 0; i < m; i++) {
859       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
860       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
861       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
862       row++;
863     }
864 
865     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
866     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
867     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
868     if (rank == 0) {
869       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
870       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
871     }
872     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
873     PetscCall(MatDestroy(&A));
874   }
875   PetscFunctionReturn(PETSC_SUCCESS);
876 }
877 
878 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
879 {
880   PetscBool iascii, isbinary, isdraw, issocket;
881 
882   PetscFunctionBegin;
883   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
884   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
885   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
886   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
887 
888   if (iascii || issocket || isdraw) {
889     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
890   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
891   PetscFunctionReturn(PETSC_SUCCESS);
892 }
893 
894 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
895 {
896   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
897   Mat            mdn = mat->A;
898   PetscLogDouble isend[5], irecv[5];
899 
900   PetscFunctionBegin;
901   info->block_size = 1.0;
902 
903   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
904 
905   isend[0] = info->nz_used;
906   isend[1] = info->nz_allocated;
907   isend[2] = info->nz_unneeded;
908   isend[3] = info->memory;
909   isend[4] = info->mallocs;
910   if (flag == MAT_LOCAL) {
911     info->nz_used      = isend[0];
912     info->nz_allocated = isend[1];
913     info->nz_unneeded  = isend[2];
914     info->memory       = isend[3];
915     info->mallocs      = isend[4];
916   } else if (flag == MAT_GLOBAL_MAX) {
917     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
918 
919     info->nz_used      = irecv[0];
920     info->nz_allocated = irecv[1];
921     info->nz_unneeded  = irecv[2];
922     info->memory       = irecv[3];
923     info->mallocs      = irecv[4];
924   } else if (flag == MAT_GLOBAL_SUM) {
925     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
926 
927     info->nz_used      = irecv[0];
928     info->nz_allocated = irecv[1];
929     info->nz_unneeded  = irecv[2];
930     info->memory       = irecv[3];
931     info->mallocs      = irecv[4];
932   }
933   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
934   info->fill_ratio_needed = 0;
935   info->factor_mallocs    = 0;
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
939 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
940 {
941   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
942 
943   PetscFunctionBegin;
944   switch (op) {
945   case MAT_NEW_NONZERO_LOCATIONS:
946   case MAT_NEW_NONZERO_LOCATION_ERR:
947   case MAT_NEW_NONZERO_ALLOCATION_ERR:
948     MatCheckPreallocated(A, 1);
949     PetscCall(MatSetOption(a->A, op, flg));
950     break;
951   case MAT_ROW_ORIENTED:
952     MatCheckPreallocated(A, 1);
953     a->roworiented = flg;
954     PetscCall(MatSetOption(a->A, op, flg));
955     break;
956   case MAT_IGNORE_OFF_PROC_ENTRIES:
957     a->donotstash = flg;
958     break;
959   case MAT_SYMMETRIC:
960   case MAT_STRUCTURALLY_SYMMETRIC:
961   case MAT_HERMITIAN:
962   case MAT_SYMMETRY_ETERNAL:
963   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
964   case MAT_SPD:
965   case MAT_SPD_ETERNAL:
966     /* if the diagonal matrix is square it inherits some of the properties above */
967     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
968     break;
969   default:
970     break;
971   }
972   PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 
975 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
976 {
977   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
978   const PetscScalar *l;
979   PetscScalar        x, *v, *vv, *r;
980   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
981 
982   PetscFunctionBegin;
983   PetscCall(MatDenseGetArray(mdn->A, &vv));
984   PetscCall(MatDenseGetLDA(mdn->A, &lda));
985   PetscCall(MatGetLocalSize(A, &s2, &s3));
986   if (ll) {
987     PetscCall(VecGetLocalSize(ll, &s2a));
988     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
989     PetscCall(VecGetArrayRead(ll, &l));
990     for (i = 0; i < m; i++) {
991       x = l[i];
992       v = vv + i;
993       for (j = 0; j < n; j++) {
994         (*v) *= x;
995         v += lda;
996       }
997     }
998     PetscCall(VecRestoreArrayRead(ll, &l));
999     PetscCall(PetscLogFlops(1.0 * n * m));
1000   }
1001   if (rr) {
1002     const PetscScalar *ar;
1003 
1004     PetscCall(VecGetLocalSize(rr, &s3a));
1005     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1006     PetscCall(VecGetArrayRead(rr, &ar));
1007     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1008     PetscCall(VecGetArray(mdn->lvec, &r));
1009     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1010     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1011     PetscCall(VecRestoreArrayRead(rr, &ar));
1012     for (i = 0; i < n; i++) {
1013       x = r[i];
1014       v = vv + i * lda;
1015       for (j = 0; j < m; j++) (*v++) *= x;
1016     }
1017     PetscCall(VecRestoreArray(mdn->lvec, &r));
1018     PetscCall(PetscLogFlops(1.0 * n * m));
1019   }
1020   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1021   PetscFunctionReturn(PETSC_SUCCESS);
1022 }
1023 
1024 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1025 {
1026   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1027   PetscInt           i, j;
1028   PetscMPIInt        size;
1029   PetscReal          sum = 0.0;
1030   const PetscScalar *av, *v;
1031 
1032   PetscFunctionBegin;
1033   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1034   v = av;
1035   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1036   if (size == 1) {
1037     PetscCall(MatNorm(mdn->A, type, nrm));
1038   } else {
1039     if (type == NORM_FROBENIUS) {
1040       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1041         sum += PetscRealPart(PetscConj(*v) * (*v));
1042         v++;
1043       }
1044       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1045       *nrm = PetscSqrtReal(*nrm);
1046       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1047     } else if (type == NORM_1) {
1048       PetscReal *tmp;
1049 
1050       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1051       *nrm = 0.0;
1052       v    = av;
1053       for (j = 0; j < mdn->A->cmap->n; j++) {
1054         for (i = 0; i < mdn->A->rmap->n; i++) {
1055           tmp[j] += PetscAbsScalar(*v);
1056           v++;
1057         }
1058       }
1059       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1060       for (j = 0; j < A->cmap->N; j++) {
1061         if (tmp[j] > *nrm) *nrm = tmp[j];
1062       }
1063       PetscCall(PetscFree(tmp));
1064       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1065     } else if (type == NORM_INFINITY) { /* max row norm */
1066       PetscCall(MatNorm(mdn->A, type, nrm));
1067       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1068     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1069   }
1070   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1071   PetscFunctionReturn(PETSC_SUCCESS);
1072 }
1073 
1074 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1075 {
1076   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1077   Mat           B;
1078   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1079   PetscInt      j, i, lda;
1080   PetscScalar  *v;
1081 
1082   PetscFunctionBegin;
1083   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1084   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1085     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1086     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1087     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1088     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1089   } else B = *matout;
1090 
1091   m = a->A->rmap->n;
1092   n = a->A->cmap->n;
1093   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1094   PetscCall(MatDenseGetLDA(a->A, &lda));
1095   PetscCall(PetscMalloc1(m, &rwork));
1096   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1097   for (j = 0; j < n; j++) {
1098     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1099     v = PetscSafePointerPlusOffset(v, lda);
1100   }
1101   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1102   PetscCall(PetscFree(rwork));
1103   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1104   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1105   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1106     *matout = B;
1107   } else {
1108     PetscCall(MatHeaderMerge(A, &B));
1109   }
1110   PetscFunctionReturn(PETSC_SUCCESS);
1111 }
1112 
1113 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1114 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1115 
1116 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1117 {
1118   PetscFunctionBegin;
1119   PetscCall(PetscLayoutSetUp(A->rmap));
1120   PetscCall(PetscLayoutSetUp(A->cmap));
1121   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1126 {
1127   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1128 
1129   PetscFunctionBegin;
1130   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1135 {
1136   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1137 
1138   PetscFunctionBegin;
1139   PetscCall(MatConjugate(a->A));
1140   PetscFunctionReturn(PETSC_SUCCESS);
1141 }
1142 
1143 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1144 {
1145   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1146 
1147   PetscFunctionBegin;
1148   PetscCall(MatRealPart(a->A));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1153 {
1154   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1155 
1156   PetscFunctionBegin;
1157   PetscCall(MatImaginaryPart(a->A));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1162 {
1163   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1167   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1168   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1173 
1174 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1175 {
1176   PetscInt      i, m, n;
1177   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatGetSize(A, &m, &n));
1181   if (type == REDUCTION_MEAN_REALPART) {
1182     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1183   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1184     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1185   } else {
1186     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1187   }
1188   if (type == NORM_2) {
1189     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1190   }
1191   if (type == NORM_INFINITY) {
1192     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1193   } else {
1194     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1195   }
1196   if (type == NORM_2) {
1197     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1198   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1199     for (i = 0; i < n; i++) reductions[i] /= m;
1200   }
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1205 {
1206   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(MatSetRandom(d->A, rctx));
1210 #if defined(PETSC_HAVE_DEVICE)
1211   x->offloadmask = d->A->offloadmask;
1212 #endif
1213   PetscFunctionReturn(PETSC_SUCCESS);
1214 }
1215 
1216 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1217 {
1218   PetscFunctionBegin;
1219   *missing = PETSC_FALSE;
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1224 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1225 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1226 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1227 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1228 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1229 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1230 
1231 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1232                                        MatGetRow_MPIDense,
1233                                        MatRestoreRow_MPIDense,
1234                                        MatMult_MPIDense,
1235                                        /*  4*/ MatMultAdd_MPIDense,
1236                                        MatMultTranspose_MPIDense,
1237                                        MatMultTransposeAdd_MPIDense,
1238                                        NULL,
1239                                        NULL,
1240                                        NULL,
1241                                        /* 10*/ NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        MatTranspose_MPIDense,
1246                                        /* 15*/ MatGetInfo_MPIDense,
1247                                        MatEqual_MPIDense,
1248                                        MatGetDiagonal_MPIDense,
1249                                        MatDiagonalScale_MPIDense,
1250                                        MatNorm_MPIDense,
1251                                        /* 20*/ MatAssemblyBegin_MPIDense,
1252                                        MatAssemblyEnd_MPIDense,
1253                                        MatSetOption_MPIDense,
1254                                        MatZeroEntries_MPIDense,
1255                                        /* 24*/ MatZeroRows_MPIDense,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        /* 29*/ MatSetUp_MPIDense,
1261                                        NULL,
1262                                        NULL,
1263                                        MatGetDiagonalBlock_MPIDense,
1264                                        NULL,
1265                                        /* 34*/ MatDuplicate_MPIDense,
1266                                        NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        NULL,
1270                                        /* 39*/ MatAXPY_MPIDense,
1271                                        MatCreateSubMatrices_MPIDense,
1272                                        NULL,
1273                                        MatGetValues_MPIDense,
1274                                        MatCopy_MPIDense,
1275                                        /* 44*/ NULL,
1276                                        MatScale_MPIDense,
1277                                        MatShift_MPIDense,
1278                                        NULL,
1279                                        NULL,
1280                                        /* 49*/ MatSetRandom_MPIDense,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        /* 54*/ NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1291                                        MatDestroy_MPIDense,
1292                                        MatView_MPIDense,
1293                                        NULL,
1294                                        NULL,
1295                                        /* 64*/ NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        /* 69*/ NULL,
1301                                        NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        NULL,
1305                                        /* 74*/ NULL,
1306                                        NULL,
1307                                        NULL,
1308                                        NULL,
1309                                        NULL,
1310                                        /* 79*/ NULL,
1311                                        NULL,
1312                                        NULL,
1313                                        NULL,
1314                                        /* 83*/ MatLoad_MPIDense,
1315                                        NULL,
1316                                        NULL,
1317                                        NULL,
1318                                        NULL,
1319                                        NULL,
1320                                        /* 89*/ NULL,
1321                                        NULL,
1322                                        NULL,
1323                                        NULL,
1324                                        NULL,
1325                                        /* 94*/ NULL,
1326                                        NULL,
1327                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1328                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1329                                        NULL,
1330                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1331                                        NULL,
1332                                        NULL,
1333                                        MatConjugate_MPIDense,
1334                                        NULL,
1335                                        /*104*/ NULL,
1336                                        MatRealPart_MPIDense,
1337                                        MatImaginaryPart_MPIDense,
1338                                        NULL,
1339                                        NULL,
1340                                        /*109*/ NULL,
1341                                        NULL,
1342                                        NULL,
1343                                        MatGetColumnVector_MPIDense,
1344                                        MatMissingDiagonal_MPIDense,
1345                                        /*114*/ NULL,
1346                                        NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        /*119*/ NULL,
1351                                        NULL,
1352                                        MatMultHermitianTranspose_MPIDense,
1353                                        MatMultHermitianTransposeAdd_MPIDense,
1354                                        NULL,
1355                                        /*124*/ NULL,
1356                                        MatGetColumnReductions_MPIDense,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        /*129*/ NULL,
1361                                        NULL,
1362                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1363                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1364                                        NULL,
1365                                        /*134*/ NULL,
1366                                        NULL,
1367                                        NULL,
1368                                        NULL,
1369                                        NULL,
1370                                        /*139*/ NULL,
1371                                        NULL,
1372                                        NULL,
1373                                        NULL,
1374                                        NULL,
1375                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1376                                        /*145*/ NULL,
1377                                        NULL,
1378                                        NULL,
1379                                        NULL,
1380                                        NULL,
1381                                        /*150*/ NULL,
1382                                        NULL,
1383                                        NULL,
1384                                        NULL,
1385                                        NULL,
1386                                        /*155*/ NULL,
1387                                        NULL};
1388 
1389 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1390 {
1391   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1392   MatType       mtype = MATSEQDENSE;
1393 
1394   PetscFunctionBegin;
1395   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1396   PetscCall(PetscLayoutSetUp(mat->rmap));
1397   PetscCall(PetscLayoutSetUp(mat->cmap));
1398   if (!a->A) {
1399     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1400     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1401   }
1402 #if defined(PETSC_HAVE_CUDA)
1403   PetscBool iscuda;
1404   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1405   if (iscuda) mtype = MATSEQDENSECUDA;
1406 #endif
1407 #if defined(PETSC_HAVE_HIP)
1408   PetscBool iship;
1409   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1410   if (iship) mtype = MATSEQDENSEHIP;
1411 #endif
1412   PetscCall(MatSetType(a->A, mtype));
1413   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1414 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1415   mat->offloadmask = a->A->offloadmask;
1416 #endif
1417   mat->preallocated = PETSC_TRUE;
1418   mat->assembled    = PETSC_TRUE;
1419   PetscFunctionReturn(PETSC_SUCCESS);
1420 }
1421 
1422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1423 {
1424   Mat B, C;
1425 
1426   PetscFunctionBegin;
1427   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1428   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1429   PetscCall(MatDestroy(&C));
1430   if (reuse == MAT_REUSE_MATRIX) {
1431     C = *newmat;
1432   } else C = NULL;
1433   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1434   PetscCall(MatDestroy(&B));
1435   if (reuse == MAT_INPLACE_MATRIX) {
1436     PetscCall(MatHeaderReplace(A, &C));
1437   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1438   PetscFunctionReturn(PETSC_SUCCESS);
1439 }
1440 
1441 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1442 {
1443   Mat B, C;
1444 
1445   PetscFunctionBegin;
1446   PetscCall(MatDenseGetLocalMatrix(A, &C));
1447   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1448   if (reuse == MAT_REUSE_MATRIX) {
1449     C = *newmat;
1450   } else C = NULL;
1451   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1452   PetscCall(MatDestroy(&B));
1453   if (reuse == MAT_INPLACE_MATRIX) {
1454     PetscCall(MatHeaderReplace(A, &C));
1455   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1456   PetscFunctionReturn(PETSC_SUCCESS);
1457 }
1458 
1459 #if defined(PETSC_HAVE_ELEMENTAL)
1460 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1461 {
1462   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1463   Mat           mat_elemental;
1464   PetscScalar  *v;
1465   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1466 
1467   PetscFunctionBegin;
1468   if (reuse == MAT_REUSE_MATRIX) {
1469     mat_elemental = *newmat;
1470     PetscCall(MatZeroEntries(*newmat));
1471   } else {
1472     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1473     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1474     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1475     PetscCall(MatSetUp(mat_elemental));
1476     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1477   }
1478 
1479   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1480   for (i = 0; i < N; i++) cols[i] = i;
1481   for (i = 0; i < m; i++) rows[i] = rstart + i;
1482 
1483   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1484   PetscCall(MatDenseGetArray(A, &v));
1485   PetscCall(MatDenseGetLDA(a->A, &lda));
1486   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1487   else {
1488     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1489   }
1490   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1491   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1492   PetscCall(MatDenseRestoreArray(A, &v));
1493   PetscCall(PetscFree2(rows, cols));
1494 
1495   if (reuse == MAT_INPLACE_MATRIX) {
1496     PetscCall(MatHeaderReplace(A, &mat_elemental));
1497   } else {
1498     *newmat = mat_elemental;
1499   }
1500   PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502 #endif
1503 
1504 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1505 {
1506   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1507 
1508   PetscFunctionBegin;
1509   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1514 {
1515   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1516 
1517   PetscFunctionBegin;
1518   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1519   PetscFunctionReturn(PETSC_SUCCESS);
1520 }
1521 
1522 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1523 {
1524   Mat_MPIDense *mat;
1525   PetscInt      m, nloc, N;
1526 
1527   PetscFunctionBegin;
1528   PetscCall(MatGetSize(inmat, &m, &N));
1529   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1530   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1531     PetscInt sum;
1532 
1533     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1534     /* Check sum(n) = N */
1535     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1536     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1537 
1538     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1539     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1540   }
1541 
1542   /* numeric phase */
1543   mat = (Mat_MPIDense *)(*outmat)->data;
1544   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1545   PetscFunctionReturn(PETSC_SUCCESS);
1546 }
1547 
1548 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1549 {
1550   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1551   PetscInt      lda;
1552 
1553   PetscFunctionBegin;
1554   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1555   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1556   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1557   a->vecinuse = col + 1;
1558   PetscCall(MatDenseGetLDA(a->A, &lda));
1559   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1560   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1561   *v = a->cvec;
1562   PetscFunctionReturn(PETSC_SUCCESS);
1563 }
1564 
1565 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1566 {
1567   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1568 
1569   PetscFunctionBegin;
1570   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1571   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1572   VecCheckAssembled(a->cvec);
1573   a->vecinuse = 0;
1574   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1575   PetscCall(VecResetArray(a->cvec));
1576   if (v) *v = NULL;
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1581 {
1582   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1583   PetscInt      lda;
1584 
1585   PetscFunctionBegin;
1586   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1587   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1588   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1589   a->vecinuse = col + 1;
1590   PetscCall(MatDenseGetLDA(a->A, &lda));
1591   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1592   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1593   PetscCall(VecLockReadPush(a->cvec));
1594   *v = a->cvec;
1595   PetscFunctionReturn(PETSC_SUCCESS);
1596 }
1597 
1598 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1599 {
1600   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1601 
1602   PetscFunctionBegin;
1603   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1604   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1605   VecCheckAssembled(a->cvec);
1606   a->vecinuse = 0;
1607   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1608   PetscCall(VecLockReadPop(a->cvec));
1609   PetscCall(VecResetArray(a->cvec));
1610   if (v) *v = NULL;
1611   PetscFunctionReturn(PETSC_SUCCESS);
1612 }
1613 
1614 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1615 {
1616   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1617   PetscInt      lda;
1618 
1619   PetscFunctionBegin;
1620   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1621   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1622   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1623   a->vecinuse = col + 1;
1624   PetscCall(MatDenseGetLDA(a->A, &lda));
1625   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1626   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1627   *v = a->cvec;
1628   PetscFunctionReturn(PETSC_SUCCESS);
1629 }
1630 
1631 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1632 {
1633   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1634 
1635   PetscFunctionBegin;
1636   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1637   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1638   VecCheckAssembled(a->cvec);
1639   a->vecinuse = 0;
1640   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1641   PetscCall(VecResetArray(a->cvec));
1642   if (v) *v = NULL;
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1647 {
1648   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1649   Mat_MPIDense *c;
1650   MPI_Comm      comm;
1651   PetscInt      prbegin, prend, pcbegin, pcend;
1652 
1653   PetscFunctionBegin;
1654   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1655   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1656   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1657   prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1658   prend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1659   pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1660   pcend   = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1661   if (!a->cmat) {
1662     PetscCall(MatCreate(comm, &a->cmat));
1663     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1664     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1665     else {
1666       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1667       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1668       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1669     }
1670     if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1671     else {
1672       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1673       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1674       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1675     }
1676     c             = (Mat_MPIDense *)a->cmat->data;
1677     c->sub_rbegin = rbegin;
1678     c->sub_rend   = rend;
1679     c->sub_cbegin = cbegin;
1680     c->sub_cend   = cend;
1681   }
1682   c = (Mat_MPIDense *)a->cmat->data;
1683   if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1684     PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1685     PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1686     PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1687     PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1688     PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1689     c->sub_rbegin = rbegin;
1690     c->sub_rend   = rend;
1691   }
1692   if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1693     // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1694     if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1695       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1696       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1697       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1698       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1699       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1700       PetscCall(VecDestroy(&c->lvec));
1701       PetscCall(PetscSFDestroy(&c->Mvctx));
1702     }
1703     c->sub_cbegin = cbegin;
1704     c->sub_cend   = cend;
1705   }
1706   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1707   PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1708 
1709   a->cmat->preallocated = PETSC_TRUE;
1710   a->cmat->assembled    = PETSC_TRUE;
1711 #if defined(PETSC_HAVE_DEVICE)
1712   a->cmat->offloadmask = c->A->offloadmask;
1713 #endif
1714   a->matinuse = cbegin + 1;
1715   *v          = a->cmat;
1716   PetscFunctionReturn(PETSC_SUCCESS);
1717 }
1718 
1719 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1720 {
1721   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1722   Mat_MPIDense *c;
1723 
1724   PetscFunctionBegin;
1725   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1726   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1727   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1728   a->matinuse = 0;
1729   c           = (Mat_MPIDense *)a->cmat->data;
1730   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1731   if (v) *v = NULL;
1732 #if defined(PETSC_HAVE_DEVICE)
1733   A->offloadmask = a->A->offloadmask;
1734 #endif
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 /*MC
1739    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1740 
1741    Options Database Key:
1742 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1743 
1744   Level: beginner
1745 
1746 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1747 M*/
1748 PetscErrorCode MatCreate_MPIDense(Mat mat)
1749 {
1750   Mat_MPIDense *a;
1751 
1752   PetscFunctionBegin;
1753   PetscCall(PetscNew(&a));
1754   mat->data   = (void *)a;
1755   mat->ops[0] = MatOps_Values;
1756 
1757   mat->insertmode = NOT_SET_VALUES;
1758 
1759   /* build cache for off array entries formed */
1760   a->donotstash = PETSC_FALSE;
1761 
1762   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1763 
1764   /* stuff used for matrix vector multiply */
1765   a->lvec        = NULL;
1766   a->Mvctx       = NULL;
1767   a->roworiented = PETSC_TRUE;
1768 
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1782   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1790 #if defined(PETSC_HAVE_ELEMENTAL)
1791   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1792 #endif
1793 #if defined(PETSC_HAVE_SCALAPACK)
1794   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1795 #endif
1796 #if defined(PETSC_HAVE_CUDA)
1797   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1798 #endif
1799   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1800   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1801   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1802 #if defined(PETSC_HAVE_CUDA)
1803   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1804   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1805 #endif
1806 #if defined(PETSC_HAVE_HIP)
1807   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1810 #endif
1811   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1812   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1813   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1814   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1815   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1816   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1817   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 /*MC
1822    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1823 
1824    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1825    and `MATMPIDENSE` otherwise.
1826 
1827    Options Database Key:
1828 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1829 
1830   Level: beginner
1831 
1832 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1833 M*/
1834 
1835 /*@
1836   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1837 
1838   Collective
1839 
1840   Input Parameters:
1841 + B    - the matrix
1842 - data - optional location of matrix data.  Set to `NULL` for PETSc
1843          to control all matrix memory allocation.
1844 
1845   Level: intermediate
1846 
1847   Notes:
1848   The dense format is fully compatible with standard Fortran
1849   storage by columns.
1850 
1851   The data input variable is intended primarily for Fortran programmers
1852   who wish to allocate their own matrix memory space.  Most users should
1853   set `data` to `NULL`.
1854 
1855 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1856 @*/
1857 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1858 {
1859   PetscFunctionBegin;
1860   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1861   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1862   PetscFunctionReturn(PETSC_SUCCESS);
1863 }
1864 
1865 /*@
1866   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1867   array provided by the user. This is useful to avoid copying an array
1868   into a matrix
1869 
1870   Not Collective
1871 
1872   Input Parameters:
1873 + mat   - the matrix
1874 - array - the array in column major order
1875 
1876   Level: developer
1877 
1878   Note:
1879   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1880 
1881   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1882   freed when the matrix is destroyed.
1883 
1884 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1885           `MatDenseReplaceArray()`
1886 @*/
1887 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1888 {
1889   PetscFunctionBegin;
1890   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1891   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1892   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1893 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1894   mat->offloadmask = PETSC_OFFLOAD_CPU;
1895 #endif
1896   PetscFunctionReturn(PETSC_SUCCESS);
1897 }
1898 
1899 /*@
1900   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1901 
1902   Not Collective
1903 
1904   Input Parameter:
1905 . mat - the matrix
1906 
1907   Level: developer
1908 
1909   Note:
1910   You can only call this after a call to `MatDensePlaceArray()`
1911 
1912 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1913 @*/
1914 PetscErrorCode MatDenseResetArray(Mat mat)
1915 {
1916   PetscFunctionBegin;
1917   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1918   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1919   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1920   PetscFunctionReturn(PETSC_SUCCESS);
1921 }
1922 
1923 /*@
1924   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1925   array provided by the user. This is useful to avoid copying an array
1926   into a matrix
1927 
1928   Not Collective
1929 
1930   Input Parameters:
1931 + mat   - the matrix
1932 - array - the array in column major order
1933 
1934   Level: developer
1935 
1936   Note:
1937   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1938 
1939   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1940   freed by the user. It will be freed when the matrix is destroyed.
1941 
1942 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1943 @*/
1944 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1945 {
1946   PetscFunctionBegin;
1947   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1948   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1949   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1950 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1951   mat->offloadmask = PETSC_OFFLOAD_CPU;
1952 #endif
1953   PetscFunctionReturn(PETSC_SUCCESS);
1954 }
1955 
1956 /*@
1957   MatCreateDense - Creates a matrix in `MATDENSE` format.
1958 
1959   Collective
1960 
1961   Input Parameters:
1962 + comm - MPI communicator
1963 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1964 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1965 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1966 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1967 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1968    to control all matrix memory allocation.
1969 
1970   Output Parameter:
1971 . A - the matrix
1972 
1973   Level: intermediate
1974 
1975   Notes:
1976   The dense format is fully compatible with standard Fortran
1977   storage by columns.
1978 
1979   Although local portions of the matrix are stored in column-major
1980   order, the matrix is partitioned across MPI ranks by row.
1981 
1982   The data input variable is intended primarily for Fortran programmers
1983   who wish to allocate their own matrix memory space.  Most users should
1984   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1985 
1986   The user MUST specify either the local or global matrix dimensions
1987   (possibly both).
1988 
1989 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1990 @*/
1991 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1992 {
1993   PetscFunctionBegin;
1994   PetscCall(MatCreate(comm, A));
1995   PetscCall(MatSetSizes(*A, m, n, M, N));
1996   PetscCall(MatSetType(*A, MATDENSE));
1997   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1998   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1999   PetscFunctionReturn(PETSC_SUCCESS);
2000 }
2001 
2002 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
2003 {
2004   Mat           mat;
2005   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
2006 
2007   PetscFunctionBegin;
2008   *newmat = NULL;
2009   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
2010   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2011   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2012   a = (Mat_MPIDense *)mat->data;
2013 
2014   mat->factortype   = A->factortype;
2015   mat->assembled    = PETSC_TRUE;
2016   mat->preallocated = PETSC_TRUE;
2017 
2018   mat->insertmode = NOT_SET_VALUES;
2019   a->donotstash   = oldmat->donotstash;
2020 
2021   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2022   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2023 
2024   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2025 
2026   *newmat = mat;
2027   PetscFunctionReturn(PETSC_SUCCESS);
2028 }
2029 
2030 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2031 {
2032   PetscBool isbinary;
2033 #if defined(PETSC_HAVE_HDF5)
2034   PetscBool ishdf5;
2035 #endif
2036 
2037   PetscFunctionBegin;
2038   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2039   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2040   /* force binary viewer to load .info file if it has not yet done so */
2041   PetscCall(PetscViewerSetUp(viewer));
2042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2043 #if defined(PETSC_HAVE_HDF5)
2044   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2045 #endif
2046   if (isbinary) {
2047     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2048 #if defined(PETSC_HAVE_HDF5)
2049   } else if (ishdf5) {
2050     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2051 #endif
2052   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2057 {
2058   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2059   Mat           a, b;
2060 
2061   PetscFunctionBegin;
2062   a = matA->A;
2063   b = matB->A;
2064   PetscCall(MatEqual(a, b, flag));
2065   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2066   PetscFunctionReturn(PETSC_SUCCESS);
2067 }
2068 
2069 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2070 {
2071   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2072 
2073   PetscFunctionBegin;
2074   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2075   PetscCall(MatDestroy(&atb->atb));
2076   PetscCall(PetscFree(atb));
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2081 {
2082   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2083 
2084   PetscFunctionBegin;
2085   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2086   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2087   PetscCall(PetscFree(abt));
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2092 {
2093   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2094   Mat_TransMatMultDense *atb;
2095   MPI_Comm               comm;
2096   PetscMPIInt            size, *recvcounts;
2097   PetscScalar           *carray, *sendbuf;
2098   const PetscScalar     *atbarray;
2099   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2100   const PetscInt        *ranges;
2101 
2102   PetscFunctionBegin;
2103   MatCheckProduct(C, 3);
2104   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2105   atb        = (Mat_TransMatMultDense *)C->product->data;
2106   recvcounts = atb->recvcounts;
2107   sendbuf    = atb->sendbuf;
2108 
2109   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2110   PetscCallMPI(MPI_Comm_size(comm, &size));
2111 
2112   /* compute atbarray = aseq^T * bseq */
2113   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2114 
2115   PetscCall(MatGetOwnershipRanges(C, &ranges));
2116 
2117   if (ranges[1] == C->rmap->N) {
2118     /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2119     PetscInt           atb_lda, c_lda;
2120     Mat                atb_local = atb->atb;
2121     Mat                atb_alloc = NULL;
2122     Mat                c_local   = c->A;
2123     Mat                c_alloc   = NULL;
2124     PetscMemType       atb_memtype, c_memtype;
2125     const PetscScalar *atb_array = NULL;
2126     MPI_Datatype       vector_type;
2127     PetscScalar       *c_array = NULL;
2128     PetscMPIInt        rank;
2129 
2130     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2131 
2132     PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2133     if (atb_lda != C->rmap->N) {
2134       // copy atb to a matrix that will have lda == the number of rows
2135       PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2136       PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2137       atb_local = atb_alloc;
2138     }
2139 
2140     if (rank == 0) {
2141       PetscCall(MatDenseGetLDA(c_local, &c_lda));
2142       if (c_lda != C->rmap->N) {
2143         // copy c to a matrix that will have lda == the number of rows
2144         PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2145         c_local = c_alloc;
2146       }
2147       PetscCall(MatZeroEntries(c_local));
2148     }
2149     /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2150      * B->cmap->N: use the a->Mvctx to use the best reduction method */
2151     if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2152     vector_type = MPIU_SCALAR;
2153     if (B->cmap->N > 1) {
2154       PetscMPIInt mpi_N;
2155 
2156       PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2157       PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2158       PetscCallMPI(MPI_Type_commit(&vector_type));
2159     }
2160     PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2161     PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2162     PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2163     PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2164     PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2165     PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2166     if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2167     if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2168     PetscCall(MatDestroy(&atb_alloc));
2169     PetscCall(MatDestroy(&c_alloc));
2170     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2171     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2172     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2173     PetscFunctionReturn(PETSC_SUCCESS);
2174   }
2175 
2176   /* arrange atbarray into sendbuf */
2177   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2178   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2179   for (proc = 0, k = 0; proc < size; proc++) {
2180     for (j = 0; j < cN; j++) {
2181       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2182     }
2183   }
2184   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2185 
2186   /* sum all atbarray to local values of C */
2187   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2188   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2189   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2190   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2191   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2192   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2193   PetscFunctionReturn(PETSC_SUCCESS);
2194 }
2195 
2196 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2197 {
2198   MPI_Comm               comm;
2199   PetscMPIInt            size;
2200   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2201   Mat_TransMatMultDense *atb;
2202   PetscBool              cisdense = PETSC_FALSE;
2203   const PetscInt        *ranges;
2204 
2205   PetscFunctionBegin;
2206   MatCheckProduct(C, 4);
2207   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2208   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2209   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2210     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2211   }
2212 
2213   /* create matrix product C */
2214   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2215 #if defined(PETSC_HAVE_CUDA)
2216   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2217 #endif
2218 #if defined(PETSC_HAVE_HIP)
2219   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2220 #endif
2221   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2222   PetscCall(MatSetUp(C));
2223 
2224   /* create data structure for reuse C */
2225   PetscCallMPI(MPI_Comm_size(comm, &size));
2226   PetscCall(PetscNew(&atb));
2227   cM = C->rmap->N;
2228   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2229   PetscCall(MatGetOwnershipRanges(C, &ranges));
2230   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2231   C->product->data    = atb;
2232   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2233   PetscFunctionReturn(PETSC_SUCCESS);
2234 }
2235 
2236 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2237 {
2238   MPI_Comm               comm;
2239   PetscMPIInt            i, size;
2240   PetscInt               maxRows, bufsiz;
2241   PetscMPIInt            tag;
2242   PetscInt               alg;
2243   Mat_MatTransMultDense *abt;
2244   Mat_Product           *product = C->product;
2245   PetscBool              flg;
2246 
2247   PetscFunctionBegin;
2248   MatCheckProduct(C, 4);
2249   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2250   /* check local size of A and B */
2251   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2252 
2253   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2254   alg = flg ? 0 : 1;
2255 
2256   /* setup matrix product C */
2257   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2258   PetscCall(MatSetType(C, MATMPIDENSE));
2259   PetscCall(MatSetUp(C));
2260   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2261 
2262   /* create data structure for reuse C */
2263   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2264   PetscCallMPI(MPI_Comm_size(comm, &size));
2265   PetscCall(PetscNew(&abt));
2266   abt->tag = tag;
2267   abt->alg = alg;
2268   switch (alg) {
2269   case 1: /* alg: "cyclic" */
2270     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2271     bufsiz = A->cmap->N * maxRows;
2272     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2273     break;
2274   default: /* alg: "allgatherv" */
2275     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2276     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2277     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2278     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2279     break;
2280   }
2281 
2282   C->product->data                = abt;
2283   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2284   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2285   PetscFunctionReturn(PETSC_SUCCESS);
2286 }
2287 
2288 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2289 {
2290   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2291   Mat_MatTransMultDense *abt;
2292   MPI_Comm               comm;
2293   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2294   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2295   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2296   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2297   const PetscScalar     *av, *bv;
2298   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2299   MPI_Request            reqs[2];
2300   const PetscInt        *ranges;
2301 
2302   PetscFunctionBegin;
2303   MatCheckProduct(C, 3);
2304   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2305   abt = (Mat_MatTransMultDense *)C->product->data;
2306   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2307   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2308   PetscCallMPI(MPI_Comm_size(comm, &size));
2309   PetscCall(MatDenseGetArrayRead(a->A, &av));
2310   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2311   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2312   PetscCall(MatDenseGetLDA(a->A, &i));
2313   PetscCall(PetscBLASIntCast(i, &alda));
2314   PetscCall(MatDenseGetLDA(b->A, &i));
2315   PetscCall(PetscBLASIntCast(i, &blda));
2316   PetscCall(MatDenseGetLDA(c->A, &i));
2317   PetscCall(PetscBLASIntCast(i, &clda));
2318   PetscCall(MatGetOwnershipRanges(B, &ranges));
2319   bn = B->rmap->n;
2320   if (blda == bn) {
2321     sendbuf = (PetscScalar *)bv;
2322   } else {
2323     sendbuf = abt->buf[0];
2324     for (k = 0, i = 0; i < cK; i++) {
2325       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2326     }
2327   }
2328   if (size > 1) {
2329     sendto   = (rank + size - 1) % size;
2330     recvfrom = (rank + size + 1) % size;
2331   } else {
2332     sendto = recvfrom = 0;
2333   }
2334   PetscCall(PetscBLASIntCast(cK, &ck));
2335   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2336   recvisfrom = rank;
2337   for (i = 0; i < size; i++) {
2338     /* we have finished receiving in sending, bufs can be read/modified */
2339     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2340     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2341 
2342     if (nextrecvisfrom != rank) {
2343       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2344       sendsiz = cK * bn;
2345       recvsiz = cK * nextbn;
2346       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2347       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2348       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2349     }
2350 
2351     /* local aseq * sendbuf^T */
2352     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2353     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2354 
2355     if (nextrecvisfrom != rank) {
2356       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2357       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2358     }
2359     bn         = nextbn;
2360     recvisfrom = nextrecvisfrom;
2361     sendbuf    = recvbuf;
2362   }
2363   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2364   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2365   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2366   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2367   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2368   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2369   PetscFunctionReturn(PETSC_SUCCESS);
2370 }
2371 
2372 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2373 {
2374   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2375   Mat_MatTransMultDense *abt;
2376   MPI_Comm               comm;
2377   PetscMPIInt            size, ibn;
2378   PetscScalar           *cv, *sendbuf, *recvbuf;
2379   const PetscScalar     *av, *bv;
2380   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2381   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2382   PetscBLASInt           cm, cn, ck, alda, clda;
2383 
2384   PetscFunctionBegin;
2385   MatCheckProduct(C, 3);
2386   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2387   abt = (Mat_MatTransMultDense *)C->product->data;
2388   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2389   PetscCallMPI(MPI_Comm_size(comm, &size));
2390   PetscCall(MatDenseGetArrayRead(a->A, &av));
2391   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2392   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2393   PetscCall(MatDenseGetLDA(a->A, &i));
2394   PetscCall(PetscBLASIntCast(i, &alda));
2395   PetscCall(MatDenseGetLDA(b->A, &blda));
2396   PetscCall(MatDenseGetLDA(c->A, &i));
2397   PetscCall(PetscBLASIntCast(i, &clda));
2398   /* copy transpose of B into buf[0] */
2399   bn      = B->rmap->n;
2400   sendbuf = abt->buf[0];
2401   recvbuf = abt->buf[1];
2402   for (k = 0, j = 0; j < bn; j++) {
2403     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2404   }
2405   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2406   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2407   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2408   PetscCall(PetscBLASIntCast(cK, &ck));
2409   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2410   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2411   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2412   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2413   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2414   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2415   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2416   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2417   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2418   PetscFunctionReturn(PETSC_SUCCESS);
2419 }
2420 
2421 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2422 {
2423   Mat_MatTransMultDense *abt;
2424 
2425   PetscFunctionBegin;
2426   MatCheckProduct(C, 3);
2427   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2428   abt = (Mat_MatTransMultDense *)C->product->data;
2429   switch (abt->alg) {
2430   case 1:
2431     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2432     break;
2433   default:
2434     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2435     break;
2436   }
2437   PetscFunctionReturn(PETSC_SUCCESS);
2438 }
2439 
2440 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2441 {
2442   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2443 
2444   PetscFunctionBegin;
2445   PetscCall(MatDestroy(&ab->Ce));
2446   PetscCall(MatDestroy(&ab->Ae));
2447   PetscCall(MatDestroy(&ab->Be));
2448   PetscCall(PetscFree(ab));
2449   PetscFunctionReturn(PETSC_SUCCESS);
2450 }
2451 
2452 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2453 {
2454   Mat_MatMultDense *ab;
2455   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2456   Mat_MPIDense     *b   = (Mat_MPIDense *)B->data;
2457 
2458   PetscFunctionBegin;
2459   MatCheckProduct(C, 3);
2460   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2461   ab = (Mat_MatMultDense *)C->product->data;
2462   if (ab->Ae && ab->Ce) {
2463 #if PetscDefined(HAVE_ELEMENTAL)
2464     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2465     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2466     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2467     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2468 #else
2469     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2470 #endif
2471   } else {
2472     MPI_Comm           comm;
2473     const PetscScalar *read;
2474     PetscScalar       *write;
2475     PetscInt           lda;
2476     const PetscInt    *ranges;
2477     PetscMPIInt        size;
2478 
2479     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2480     comm = PetscObjectComm((PetscObject)B);
2481     PetscCallMPI(MPI_Comm_size(comm, &size));
2482     PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2483     if (ranges[1] == ranges[size]) {
2484       // optimize for the case where the B matrix is broadcast from rank 0
2485       PetscInt           b_lda, be_lda;
2486       Mat                b_local  = b->A;
2487       Mat                b_alloc  = NULL;
2488       Mat                be_local = ab->Be;
2489       Mat                be_alloc = NULL;
2490       PetscMemType       b_memtype, be_memtype;
2491       const PetscScalar *b_array = NULL;
2492       MPI_Datatype       vector_type;
2493       PetscScalar       *be_array = NULL;
2494       PetscMPIInt        rank;
2495 
2496       PetscCallMPI(MPI_Comm_rank(comm, &rank));
2497       PetscCall(MatDenseGetLDA(be_local, &be_lda));
2498       if (be_lda != B->rmap->N) {
2499         PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2500         be_local = be_alloc;
2501       }
2502 
2503       if (rank == 0) {
2504         PetscCall(MatDenseGetLDA(b_local, &b_lda));
2505         if (b_lda != B->rmap->N) {
2506           PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2507           PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2508           b_local = b_alloc;
2509         }
2510       }
2511       vector_type = MPIU_SCALAR;
2512       if (B->cmap->N > 1) {
2513         PetscMPIInt mpi_N;
2514 
2515         PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2516         PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2517         PetscCallMPI(MPI_Type_commit(&vector_type));
2518       }
2519       PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2520       PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2521       PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2522       PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2523       PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2524       PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2525       if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2526       if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2527       PetscCall(MatDestroy(&be_alloc));
2528       PetscCall(MatDestroy(&b_alloc));
2529     } else {
2530       PetscCall(MatDenseGetLDA(B, &lda));
2531       PetscCall(MatDenseGetArrayRead(B, &read));
2532       PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2533       for (PetscInt i = 0; i < C->cmap->N; ++i) {
2534         PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2535         PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2536       }
2537       PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2538       PetscCall(MatDenseRestoreArrayRead(B, &read));
2539     }
2540     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2541   }
2542   PetscFunctionReturn(PETSC_SUCCESS);
2543 }
2544 
2545 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2546 {
2547   Mat_Product      *product = C->product;
2548   PetscInt          alg;
2549   Mat_MatMultDense *ab;
2550   PetscBool         flg;
2551 
2552   PetscFunctionBegin;
2553   MatCheckProduct(C, 4);
2554   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2555   /* check local size of A and B */
2556   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2557              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2558 
2559   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2560   alg = flg ? 0 : 1;
2561 
2562   /* setup C */
2563   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2564   PetscCall(MatSetType(C, MATMPIDENSE));
2565   PetscCall(MatSetUp(C));
2566 
2567   /* create data structure for reuse Cdense */
2568   PetscCall(PetscNew(&ab));
2569 
2570   switch (alg) {
2571   case 1: /* alg: "elemental" */
2572 #if PetscDefined(HAVE_ELEMENTAL)
2573     /* create elemental matrices Ae and Be */
2574     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2575     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2576     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2577     PetscCall(MatSetUp(ab->Ae));
2578     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2579 
2580     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2581     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2582     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2583     PetscCall(MatSetUp(ab->Be));
2584     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2585 
2586     /* compute symbolic Ce = Ae*Be */
2587     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2588     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2589 #else
2590     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2591 #endif
2592     break;
2593   default: /* alg: "petsc" */
2594     ab->Ae = NULL;
2595     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2596     ab->Ce = NULL;
2597     break;
2598   }
2599 
2600   C->product->data       = ab;
2601   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2602   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2603   PetscFunctionReturn(PETSC_SUCCESS);
2604 }
2605 
2606 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2607 {
2608   Mat_Product *product     = C->product;
2609   const char  *algTypes[2] = {"petsc", "elemental"};
2610   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2611   PetscBool    flg = PETSC_FALSE;
2612 
2613   PetscFunctionBegin;
2614   /* Set default algorithm */
2615   alg = 0; /* default is PETSc */
2616   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2617   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2618 
2619   /* Get runtime option */
2620   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2621   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2622   PetscOptionsEnd();
2623   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2624 
2625   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2626   C->ops->productsymbolic = MatProductSymbolic_AB;
2627   PetscFunctionReturn(PETSC_SUCCESS);
2628 }
2629 
2630 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2631 {
2632   Mat_Product *product = C->product;
2633   Mat          A = product->A, B = product->B;
2634 
2635   PetscFunctionBegin;
2636   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2637              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2638   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2639   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2640   PetscFunctionReturn(PETSC_SUCCESS);
2641 }
2642 
2643 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2644 {
2645   Mat_Product *product     = C->product;
2646   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2647   PetscInt     alg, nalg = 2;
2648   PetscBool    flg = PETSC_FALSE;
2649 
2650   PetscFunctionBegin;
2651   /* Set default algorithm */
2652   alg = 0; /* default is allgatherv */
2653   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2654   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2655 
2656   /* Get runtime option */
2657   if (product->api_user) {
2658     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2659     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2660     PetscOptionsEnd();
2661   } else {
2662     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2663     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2664     PetscOptionsEnd();
2665   }
2666   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2667 
2668   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2669   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2674 {
2675   Mat_Product *product = C->product;
2676 
2677   PetscFunctionBegin;
2678   switch (product->type) {
2679   case MATPRODUCT_AB:
2680     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2681     break;
2682   case MATPRODUCT_AtB:
2683     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2684     break;
2685   case MATPRODUCT_ABt:
2686     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2687     break;
2688   default:
2689     break;
2690   }
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693