xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 57d508425293f0bb93f59574d14951d8faac9af8)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 #include <petscdevice.h>
12 #include <petsc/private/deviceimpl.h>
13 
14 /*@
15   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16   matrix that represents the operator. For sequential matrices it returns itself.
17 
18   Input Parameter:
19 . A - the sequential or MPI `MATDENSE` matrix
20 
21   Output Parameter:
22 . B - the inner matrix
23 
24   Level: intermediate
25 
26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27 @*/
28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29 {
30   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31   PetscBool     flg;
32 
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
35   PetscAssertPointer(B, 2);
36   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37   if (flg) *B = mat->A;
38   else {
39     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41     *B = A;
42   }
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47 {
48   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
50 
51   PetscFunctionBegin;
52   PetscCall(MatCopy(Amat->A, Bmat->A, s));
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57 {
58   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60   PetscScalar  *v;
61 
62   PetscFunctionBegin;
63   PetscCall(MatDenseGetArray(mat->A, &v));
64   PetscCall(MatDenseGetLDA(mat->A, &lda));
65   rend2 = PetscMin(rend, A->cmap->N);
66   if (rend2 > rstart) {
67     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68     PetscCall(PetscLogFlops(rend2 - rstart));
69   }
70   PetscCall(MatDenseRestoreArray(mat->A, &v));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75 {
76   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
78 
79   PetscFunctionBegin;
80   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81   lrow = row - rstart;
82   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87 {
88   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
90 
91   PetscFunctionBegin;
92   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93   lrow = row - rstart;
94   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
102   PetscScalar  *array;
103   MPI_Comm      comm;
104   PetscBool     flg;
105   Mat           B;
106 
107   PetscFunctionBegin;
108   PetscCall(MatHasCongruentLayouts(A, &flg));
109   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112 #if PetscDefined(HAVE_CUDA)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115 #elif PetscDefined(HAVE_HIP)
116     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118 #endif
119     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120     PetscCall(MatCreate(comm, &B));
121     PetscCall(MatSetSizes(B, m, m, m, m));
122     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127     *a = B;
128     PetscCall(MatDestroy(&B));
129   } else *a = B;
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134 {
135   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137   PetscBool     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i = 0; i < m; i++) {
141     if (idxm[i] < 0) continue;
142     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147       } else {
148         for (j = 0; j < n; j++) {
149           if (idxn[j] < 0) continue;
150           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152         }
153       }
154     } else if (!A->donotstash) {
155       mat->assembled = PETSC_FALSE;
156       if (roworiented) {
157         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158       } else {
159         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160       }
161     }
162   }
163   PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 
166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167 {
168   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
170 
171   PetscFunctionBegin;
172   for (i = 0; i < m; i++) {
173     if (idxm[i] < 0) continue; /* negative row */
174     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
176     row = idxm[i] - rstart;
177     for (j = 0; j < n; j++) {
178       if (idxn[j] < 0) continue; /* negative column */
179       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180       PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181     }
182   }
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
186 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
187 {
188   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
189 
190   PetscFunctionBegin;
191   PetscCall(MatDenseGetLDA(a->A, lda));
192   PetscFunctionReturn(PETSC_SUCCESS);
193 }
194 
195 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
196 {
197   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
198   MatType       mtype = MATSEQDENSE;
199 
200   PetscFunctionBegin;
201   if (!a->A) {
202     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203     PetscCall(PetscLayoutSetUp(A->rmap));
204     PetscCall(PetscLayoutSetUp(A->cmap));
205     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
206     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
207 #if PetscDefined(HAVE_CUDA)
208     PetscBool iscuda;
209     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
210     if (iscuda) mtype = MATSEQDENSECUDA;
211 #elif PetscDefined(HAVE_HIP)
212     PetscBool iship;
213     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
214     if (iship) mtype = MATSEQDENSEHIP;
215 #endif
216     PetscCall(MatSetType(a->A, mtype));
217   }
218   PetscCall(MatDenseSetLDA(a->A, lda));
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 
222 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
223 {
224   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
225 
226   PetscFunctionBegin;
227   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
228   PetscCall(MatDenseGetArray(a->A, array));
229   PetscFunctionReturn(PETSC_SUCCESS);
230 }
231 
232 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
233 {
234   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
235 
236   PetscFunctionBegin;
237   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
238   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
243 {
244   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
245 
246   PetscFunctionBegin;
247   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
248   PetscCall(MatDenseGetArrayWrite(a->A, array));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
253 {
254   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
255 
256   PetscFunctionBegin;
257   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
258   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
259   PetscCall(MatDensePlaceArray(a->A, array));
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
264 {
265   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
266 
267   PetscFunctionBegin;
268   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
269   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
270   PetscCall(MatDenseResetArray(a->A));
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
274 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
275 {
276   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
277 
278   PetscFunctionBegin;
279   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
280   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
281   PetscCall(MatDenseReplaceArray(a->A, array));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
286 {
287   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
288   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
289   const PetscInt    *irow, *icol;
290   const PetscScalar *v;
291   PetscScalar       *bv;
292   Mat                newmat;
293   IS                 iscol_local;
294   MPI_Comm           comm_is, comm_mat;
295 
296   PetscFunctionBegin;
297   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
298   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
299   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
300 
301   PetscCall(ISAllGather(iscol, &iscol_local));
302   PetscCall(ISGetIndices(isrow, &irow));
303   PetscCall(ISGetIndices(iscol_local, &icol));
304   PetscCall(ISGetLocalSize(isrow, &nrows));
305   PetscCall(ISGetLocalSize(iscol, &ncols));
306   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
307 
308   /* No parallel redistribution currently supported! Should really check each index set
309      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
310      original matrix! */
311 
312   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
313   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
314 
315   /* Check submatrix call */
316   if (scall == MAT_REUSE_MATRIX) {
317     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
318     /* Really need to test rows and column sizes! */
319     newmat = *B;
320   } else {
321     /* Create and fill new matrix */
322     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
323     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
324     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
325     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
326   }
327 
328   /* Now extract the data pointers and do the copy, column at a time */
329   newmatd = (Mat_MPIDense *)newmat->data;
330   PetscCall(MatDenseGetArray(newmatd->A, &bv));
331   PetscCall(MatDenseGetArrayRead(mat->A, &v));
332   PetscCall(MatDenseGetLDA(mat->A, &lda));
333   for (i = 0; i < Ncols; i++) {
334     const PetscScalar *av = v + lda * icol[i];
335     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
336   }
337   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
338   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
339 
340   /* Assemble the matrices so that the correct flags are set */
341   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
342   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
343 
344   /* Free work space */
345   PetscCall(ISRestoreIndices(isrow, &irow));
346   PetscCall(ISRestoreIndices(iscol_local, &icol));
347   PetscCall(ISDestroy(&iscol_local));
348   *B = newmat;
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
352 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
353 {
354   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
355 
356   PetscFunctionBegin;
357   PetscCall(MatDenseRestoreArray(a->A, array));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
362 {
363   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
364 
365   PetscFunctionBegin;
366   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
367   PetscFunctionReturn(PETSC_SUCCESS);
368 }
369 
370 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
371 {
372   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
373 
374   PetscFunctionBegin;
375   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
376   PetscFunctionReturn(PETSC_SUCCESS);
377 }
378 
379 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
380 {
381   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
382   PetscInt      nstash, reallocs;
383 
384   PetscFunctionBegin;
385   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
386 
387   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
388   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
389   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
390   PetscFunctionReturn(PETSC_SUCCESS);
391 }
392 
393 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
396   PetscInt      i, *row, *col, flg, j, rstart, ncols;
397   PetscMPIInt   n;
398   PetscScalar  *val;
399 
400   PetscFunctionBegin;
401   if (!mdn->donotstash && !mat->nooffprocentries) {
402     /*  wait on receives */
403     while (1) {
404       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
405       if (!flg) break;
406 
407       for (i = 0; i < n;) {
408         /* Now identify the consecutive vals belonging to the same row */
409         for (j = i, rstart = row[j]; j < n; j++) {
410           if (row[j] != rstart) break;
411         }
412         if (j < n) ncols = j - i;
413         else ncols = n - i;
414         /* Now assemble all these values with a single function call */
415         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
416         i = j;
417       }
418     }
419     PetscCall(MatStashScatterEnd_Private(&mat->stash));
420   }
421 
422   PetscCall(MatAssemblyBegin(mdn->A, mode));
423   PetscCall(MatAssemblyEnd(mdn->A, mode));
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
428 {
429   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
430 
431   PetscFunctionBegin;
432   PetscCall(MatZeroEntries(l->A));
433   PetscFunctionReturn(PETSC_SUCCESS);
434 }
435 
436 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
437 {
438   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
439   PetscInt      i, len, *lrows;
440 
441   PetscFunctionBegin;
442   /* get locally owned rows */
443   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
444   /* fix right-hand side if needed */
445   if (x && b) {
446     const PetscScalar *xx;
447     PetscScalar       *bb;
448 
449     PetscCall(VecGetArrayRead(x, &xx));
450     PetscCall(VecGetArrayWrite(b, &bb));
451     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
452     PetscCall(VecRestoreArrayRead(x, &xx));
453     PetscCall(VecRestoreArrayWrite(b, &bb));
454   }
455   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
456   if (diag != 0.0) {
457     Vec d;
458 
459     PetscCall(MatCreateVecs(A, NULL, &d));
460     PetscCall(VecSet(d, diag));
461     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
462     PetscCall(VecDestroy(&d));
463   }
464   PetscCall(PetscFree(lrows));
465   PetscFunctionReturn(PETSC_SUCCESS);
466 }
467 
468 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
471 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
472 
473 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
474 {
475   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType       axmtype, aymtype;
479 
480   PetscFunctionBegin;
481   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
482   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
483   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
484   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
485   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
486   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
487   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
488   PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
489   PetscFunctionReturn(PETSC_SUCCESS);
490 }
491 
492 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
493 {
494   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType       axmtype, aymtype;
498 
499   PetscFunctionBegin;
500   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
501   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
502   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
503   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
504   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
505   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
506   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
507   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
511 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
512 {
513   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType       axmtype, aymtype;
517 
518   PetscFunctionBegin;
519   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
520   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
521   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
522   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
523   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
524   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
525   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
526   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
539   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
540   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
541   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
542   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
543   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
544   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
545   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555   PetscInt           r_start, r_end;
556   PetscInt           c_start_local, c_end_local;
557 
558   PetscFunctionBegin;
559   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
560   PetscCall(VecZeroEntries(a->lvec));
561   PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
562   c_start_local = PetscMax(c_start, r_start);
563   c_end_local   = PetscMin(c_end, r_end);
564   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
565   if (c_end_local > c_start_local) {
566     if (PetscMemTypeHost(aymtype)) {
567       PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
568     } else {
569       PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
570       PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
571     }
572   }
573   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
574   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
575   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
576   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
577   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
578   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
582 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
583 {
584   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
585   const PetscScalar *ax;
586   PetscScalar       *ay;
587   PetscMemType       axmtype, aymtype;
588 
589   PetscFunctionBegin;
590   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
591   PetscCall(VecSet(yy, 0.0));
592   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
593   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
594   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
595   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
596   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
597   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
598   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
599   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
600   PetscFunctionReturn(PETSC_SUCCESS);
601 }
602 
603 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
604 {
605   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
606   const PetscScalar *ax;
607   PetscScalar       *ay;
608   PetscMemType       axmtype, aymtype;
609 
610   PetscFunctionBegin;
611   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
612   PetscCall(VecCopy(yy, zz));
613   PetscCall(VecZeroEntries(a->lvec));
614   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
615   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
616   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
617   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
618   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
619   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
620   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
625 {
626   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
627   const PetscScalar *ax;
628   PetscScalar       *ay;
629   PetscMemType       axmtype, aymtype;
630 
631   PetscFunctionBegin;
632   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
633   PetscCall(VecCopy(yy, zz));
634   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
635   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
636   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
637   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
638   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
639   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
640   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
641   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
642   PetscFunctionReturn(PETSC_SUCCESS);
643 }
644 
645 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
646 {
647   PetscFunctionBegin;
648   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
653 {
654   PetscFunctionBegin;
655   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
659 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
660 {
661   PetscFunctionBegin;
662   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
667 {
668   PetscFunctionBegin;
669   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
673 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
674 {
675   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
676   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
677   PetscScalar       *x;
678   const PetscScalar *av;
679 
680   PetscFunctionBegin;
681   PetscCall(VecGetArray(v, &x));
682   PetscCall(VecGetSize(v, &ng));
683   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
684   PetscCall(VecGetLocalSize(v, &nl));
685   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
686   radd = A->rmap->rstart * m;
687   PetscCall(MatDenseGetArrayRead(a->A, &av));
688   PetscCall(MatDenseGetLDA(a->A, &lda));
689   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
690   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
691   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
692   PetscCall(VecRestoreArray(v, &x));
693   PetscFunctionReturn(PETSC_SUCCESS);
694 }
695 
696 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
697 {
698   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
699 
700   PetscFunctionBegin;
701   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
702   PetscCall(MatStashDestroy_Private(&mat->stash));
703   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
704   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
705   PetscCall(MatDestroy(&mdn->A));
706   PetscCall(VecDestroy(&mdn->lvec));
707   PetscCall(PetscSFDestroy(&mdn->Mvctx));
708   PetscCall(VecDestroy(&mdn->cvec));
709   PetscCall(MatDestroy(&mdn->cmat));
710 
711   PetscCall(PetscFree(mat->data));
712   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
713 
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
727 #if defined(PETSC_HAVE_ELEMENTAL)
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
729 #endif
730 #if defined(PETSC_HAVE_SCALAPACK)
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
732 #endif
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
736 #if defined(PETSC_HAVE_CUDA)
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
755 #endif
756 #if defined(PETSC_HAVE_HIP)
757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
775 #endif
776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
782   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
790 
791   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 #include <petscdraw.h>
796 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
797 {
798   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
799   PetscMPIInt       rank;
800   PetscViewerType   vtype;
801   PetscBool         isascii, isdraw;
802   PetscViewer       sviewer;
803   PetscViewerFormat format;
804 
805   PetscFunctionBegin;
806   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
807   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
808   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
809   if (isascii) {
810     PetscCall(PetscViewerGetType(viewer, &vtype));
811     PetscCall(PetscViewerGetFormat(viewer, &format));
812     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
813       MatInfo info;
814       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
815       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
816       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
817                                                    (PetscInt)info.memory));
818       PetscCall(PetscViewerFlush(viewer));
819       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
820       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
821       PetscFunctionReturn(PETSC_SUCCESS);
822     } else if (format == PETSC_VIEWER_ASCII_INFO) {
823       PetscFunctionReturn(PETSC_SUCCESS);
824     }
825   } else if (isdraw) {
826     PetscDraw draw;
827     PetscBool isnull;
828 
829     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
830     PetscCall(PetscDrawIsNull(draw, &isnull));
831     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
832   }
833 
834   {
835     /* assemble the entire matrix onto first processor. */
836     Mat          A;
837     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
838     PetscInt    *cols;
839     PetscScalar *vals;
840 
841     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
842     if (rank == 0) {
843       PetscCall(MatSetSizes(A, M, N, M, N));
844     } else {
845       PetscCall(MatSetSizes(A, 0, 0, M, N));
846     }
847     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
848     PetscCall(MatSetType(A, MATMPIDENSE));
849     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
850 
851     /* Copy the matrix ... This isn't the most efficient means,
852        but it's quick for now */
853     A->insertmode = INSERT_VALUES;
854 
855     row = mat->rmap->rstart;
856     m   = mdn->A->rmap->n;
857     for (i = 0; i < m; i++) {
858       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
859       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
860       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
861       row++;
862     }
863 
864     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
865     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
866     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
867     if (rank == 0) {
868       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
869       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
870     }
871     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
872     PetscCall(MatDestroy(&A));
873   }
874   PetscFunctionReturn(PETSC_SUCCESS);
875 }
876 
877 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
878 {
879   PetscBool isascii, isbinary, isdraw, issocket;
880 
881   PetscFunctionBegin;
882   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
883   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
884   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
885   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
886 
887   if (isascii || issocket || isdraw) PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
888   else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
889   PetscFunctionReturn(PETSC_SUCCESS);
890 }
891 
892 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
893 {
894   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
895   Mat            mdn = mat->A;
896   PetscLogDouble isend[5], irecv[5];
897 
898   PetscFunctionBegin;
899   info->block_size = 1.0;
900 
901   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
902 
903   isend[0] = info->nz_used;
904   isend[1] = info->nz_allocated;
905   isend[2] = info->nz_unneeded;
906   isend[3] = info->memory;
907   isend[4] = info->mallocs;
908   if (flag == MAT_LOCAL) {
909     info->nz_used      = isend[0];
910     info->nz_allocated = isend[1];
911     info->nz_unneeded  = isend[2];
912     info->memory       = isend[3];
913     info->mallocs      = isend[4];
914   } else if (flag == MAT_GLOBAL_MAX) {
915     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
916 
917     info->nz_used      = irecv[0];
918     info->nz_allocated = irecv[1];
919     info->nz_unneeded  = irecv[2];
920     info->memory       = irecv[3];
921     info->mallocs      = irecv[4];
922   } else if (flag == MAT_GLOBAL_SUM) {
923     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
924 
925     info->nz_used      = irecv[0];
926     info->nz_allocated = irecv[1];
927     info->nz_unneeded  = irecv[2];
928     info->memory       = irecv[3];
929     info->mallocs      = irecv[4];
930   }
931   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
932   info->fill_ratio_needed = 0;
933   info->factor_mallocs    = 0;
934   PetscFunctionReturn(PETSC_SUCCESS);
935 }
936 
937 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
938 {
939   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
940 
941   PetscFunctionBegin;
942   switch (op) {
943   case MAT_NEW_NONZERO_LOCATIONS:
944   case MAT_NEW_NONZERO_LOCATION_ERR:
945   case MAT_NEW_NONZERO_ALLOCATION_ERR:
946     MatCheckPreallocated(A, 1);
947     PetscCall(MatSetOption(a->A, op, flg));
948     break;
949   case MAT_ROW_ORIENTED:
950     MatCheckPreallocated(A, 1);
951     a->roworiented = flg;
952     PetscCall(MatSetOption(a->A, op, flg));
953     break;
954   case MAT_IGNORE_OFF_PROC_ENTRIES:
955     a->donotstash = flg;
956     break;
957   case MAT_SYMMETRIC:
958   case MAT_STRUCTURALLY_SYMMETRIC:
959   case MAT_HERMITIAN:
960   case MAT_SYMMETRY_ETERNAL:
961   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
962   case MAT_SPD:
963   case MAT_SPD_ETERNAL:
964     /* if the diagonal matrix is square it inherits some of the properties above */
965     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
966     break;
967   default:
968     break;
969   }
970   PetscFunctionReturn(PETSC_SUCCESS);
971 }
972 
973 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
974 {
975   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
976   const PetscScalar *l;
977   PetscScalar        x, *v, *vv, *r;
978   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
979 
980   PetscFunctionBegin;
981   PetscCall(MatDenseGetArray(mdn->A, &vv));
982   PetscCall(MatDenseGetLDA(mdn->A, &lda));
983   PetscCall(MatGetLocalSize(A, &s2, &s3));
984   if (ll) {
985     PetscCall(VecGetLocalSize(ll, &s2a));
986     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
987     PetscCall(VecGetArrayRead(ll, &l));
988     for (i = 0; i < m; i++) {
989       x = l[i];
990       v = vv + i;
991       for (j = 0; j < n; j++) {
992         (*v) *= x;
993         v += lda;
994       }
995     }
996     PetscCall(VecRestoreArrayRead(ll, &l));
997     PetscCall(PetscLogFlops(1.0 * n * m));
998   }
999   if (rr) {
1000     const PetscScalar *ar;
1001 
1002     PetscCall(VecGetLocalSize(rr, &s3a));
1003     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1004     PetscCall(VecGetArrayRead(rr, &ar));
1005     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1006     PetscCall(VecGetArray(mdn->lvec, &r));
1007     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1008     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1009     PetscCall(VecRestoreArrayRead(rr, &ar));
1010     for (i = 0; i < n; i++) {
1011       x = r[i];
1012       v = vv + i * lda;
1013       for (j = 0; j < m; j++) (*v++) *= x;
1014     }
1015     PetscCall(VecRestoreArray(mdn->lvec, &r));
1016     PetscCall(PetscLogFlops(1.0 * n * m));
1017   }
1018   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1023 {
1024   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1025   PetscInt           i, j;
1026   PetscMPIInt        size;
1027   PetscReal          sum = 0.0;
1028   const PetscScalar *av, *v;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1032   v = av;
1033   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1034   if (size == 1) {
1035     PetscCall(MatNorm(mdn->A, type, nrm));
1036   } else {
1037     if (type == NORM_FROBENIUS) {
1038       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1039         sum += PetscRealPart(PetscConj(*v) * (*v));
1040         v++;
1041       }
1042       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1043       *nrm = PetscSqrtReal(*nrm);
1044       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1045     } else if (type == NORM_1) {
1046       PetscReal *tmp;
1047 
1048       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1049       *nrm = 0.0;
1050       v    = av;
1051       for (j = 0; j < mdn->A->cmap->n; j++) {
1052         for (i = 0; i < mdn->A->rmap->n; i++) {
1053           tmp[j] += PetscAbsScalar(*v);
1054           v++;
1055         }
1056       }
1057       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1058       for (j = 0; j < A->cmap->N; j++) {
1059         if (tmp[j] > *nrm) *nrm = tmp[j];
1060       }
1061       PetscCall(PetscFree(tmp));
1062       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1063     } else if (type == NORM_INFINITY) { /* max row norm */
1064       PetscCall(MatNorm(mdn->A, type, nrm));
1065       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1066     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1067   }
1068   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1069   PetscFunctionReturn(PETSC_SUCCESS);
1070 }
1071 
1072 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1073 {
1074   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1075   Mat           B;
1076   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1077   PetscInt      j, i, lda;
1078   PetscScalar  *v;
1079 
1080   PetscFunctionBegin;
1081   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1082   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1083     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1084     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1085     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1086     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1087   } else B = *matout;
1088 
1089   m = a->A->rmap->n;
1090   n = a->A->cmap->n;
1091   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1092   PetscCall(MatDenseGetLDA(a->A, &lda));
1093   PetscCall(PetscMalloc1(m, &rwork));
1094   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1095   for (j = 0; j < n; j++) {
1096     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1097     v = PetscSafePointerPlusOffset(v, lda);
1098   }
1099   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1100   PetscCall(PetscFree(rwork));
1101   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1102   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1103   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1104     *matout = B;
1105   } else {
1106     PetscCall(MatHeaderMerge(A, &B));
1107   }
1108   PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110 
1111 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1112 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1113 
1114 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1115 {
1116   PetscFunctionBegin;
1117   PetscCall(PetscLayoutSetUp(A->rmap));
1118   PetscCall(PetscLayoutSetUp(A->cmap));
1119   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
1123 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1124 {
1125   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1126 
1127   PetscFunctionBegin;
1128   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
1132 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1133 {
1134   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1135 
1136   PetscFunctionBegin;
1137   PetscCall(MatConjugate(a->A));
1138   PetscFunctionReturn(PETSC_SUCCESS);
1139 }
1140 
1141 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1142 {
1143   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(MatRealPart(a->A));
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1151 {
1152   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(MatImaginaryPart(a->A));
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1160 {
1161   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1162 
1163   PetscFunctionBegin;
1164   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1165   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1166   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1171 
1172 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1173 {
1174   PetscInt      i, m, n;
1175   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(MatGetSize(A, &m, &n));
1179   if (type == REDUCTION_MEAN_REALPART) {
1180     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1181   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1182     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1183   } else {
1184     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1185   }
1186   if (type == NORM_2) {
1187     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1188   }
1189   if (type == NORM_INFINITY) {
1190     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1191   } else {
1192     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1193   }
1194   if (type == NORM_2) {
1195     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1196   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1197     for (i = 0; i < n; i++) reductions[i] /= m;
1198   }
1199   PetscFunctionReturn(PETSC_SUCCESS);
1200 }
1201 
1202 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1203 {
1204   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1205 
1206   PetscFunctionBegin;
1207   PetscCall(MatSetRandom(d->A, rctx));
1208 #if defined(PETSC_HAVE_DEVICE)
1209   x->offloadmask = d->A->offloadmask;
1210 #endif
1211   PetscFunctionReturn(PETSC_SUCCESS);
1212 }
1213 
1214 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1215 {
1216   PetscFunctionBegin;
1217   *missing = PETSC_FALSE;
1218   PetscFunctionReturn(PETSC_SUCCESS);
1219 }
1220 
1221 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1222 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1223 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1224 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1225 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1226 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1227 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1228 
1229 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1230                                        MatGetRow_MPIDense,
1231                                        MatRestoreRow_MPIDense,
1232                                        MatMult_MPIDense,
1233                                        /*  4*/ MatMultAdd_MPIDense,
1234                                        MatMultTranspose_MPIDense,
1235                                        MatMultTransposeAdd_MPIDense,
1236                                        NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        /* 10*/ NULL,
1240                                        NULL,
1241                                        NULL,
1242                                        NULL,
1243                                        MatTranspose_MPIDense,
1244                                        /* 15*/ MatGetInfo_MPIDense,
1245                                        MatEqual_MPIDense,
1246                                        MatGetDiagonal_MPIDense,
1247                                        MatDiagonalScale_MPIDense,
1248                                        MatNorm_MPIDense,
1249                                        /* 20*/ MatAssemblyBegin_MPIDense,
1250                                        MatAssemblyEnd_MPIDense,
1251                                        MatSetOption_MPIDense,
1252                                        MatZeroEntries_MPIDense,
1253                                        /* 24*/ MatZeroRows_MPIDense,
1254                                        NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        /* 29*/ MatSetUp_MPIDense,
1259                                        NULL,
1260                                        NULL,
1261                                        MatGetDiagonalBlock_MPIDense,
1262                                        NULL,
1263                                        /* 34*/ MatDuplicate_MPIDense,
1264                                        NULL,
1265                                        NULL,
1266                                        NULL,
1267                                        NULL,
1268                                        /* 39*/ MatAXPY_MPIDense,
1269                                        MatCreateSubMatrices_MPIDense,
1270                                        NULL,
1271                                        MatGetValues_MPIDense,
1272                                        MatCopy_MPIDense,
1273                                        /* 44*/ NULL,
1274                                        MatScale_MPIDense,
1275                                        MatShift_MPIDense,
1276                                        NULL,
1277                                        NULL,
1278                                        /* 49*/ MatSetRandom_MPIDense,
1279                                        NULL,
1280                                        NULL,
1281                                        NULL,
1282                                        NULL,
1283                                        /* 54*/ NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1289                                        MatDestroy_MPIDense,
1290                                        MatView_MPIDense,
1291                                        NULL,
1292                                        NULL,
1293                                        /* 64*/ NULL,
1294                                        NULL,
1295                                        NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        /* 69*/ NULL,
1299                                        NULL,
1300                                        NULL,
1301                                        NULL,
1302                                        NULL,
1303                                        /* 74*/ NULL,
1304                                        NULL,
1305                                        NULL,
1306                                        NULL,
1307                                        MatLoad_MPIDense,
1308                                        /* 79*/ NULL,
1309                                        NULL,
1310                                        NULL,
1311                                        NULL,
1312                                        /* 83*/ NULL,
1313                                        NULL,
1314                                        NULL,
1315                                        NULL,
1316                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1317                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1318                                        /* 89*/ NULL,
1319                                        MatProductSetFromOptions_MPIDense,
1320                                        NULL,
1321                                        NULL,
1322                                        MatConjugate_MPIDense,
1323                                        /* 94*/ NULL,
1324                                        NULL,
1325                                        MatRealPart_MPIDense,
1326                                        MatImaginaryPart_MPIDense,
1327                                        NULL,
1328                                        /*99*/ NULL,
1329                                        NULL,
1330                                        NULL,
1331                                        NULL,
1332                                        MatGetColumnVector_MPIDense,
1333                                        /*104*/ MatMissingDiagonal_MPIDense,
1334                                        NULL,
1335                                        NULL,
1336                                        NULL,
1337                                        NULL,
1338                                        /*109*/ NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        MatMultHermitianTranspose_MPIDense,
1342                                        MatMultHermitianTransposeAdd_MPIDense,
1343                                        /*114*/ NULL,
1344                                        NULL,
1345                                        MatGetColumnReductions_MPIDense,
1346                                        NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        /*120*/ NULL,
1350                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1351                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1352                                        NULL,
1353                                        /*124*/ NULL,
1354                                        NULL,
1355                                        NULL,
1356                                        NULL,
1357                                        NULL,
1358                                        /*129*/ NULL,
1359                                        NULL,
1360                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1361                                        NULL,
1362                                        NULL,
1363                                        /*134*/ NULL,
1364                                        NULL,
1365                                        NULL,
1366                                        NULL,
1367                                        NULL,
1368                                        /*139*/ NULL,
1369                                        NULL,
1370                                        NULL,
1371                                        NULL,
1372                                        NULL,
1373                                        NULL};
1374 
1375 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1376 {
1377   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1378   MatType       mtype = MATSEQDENSE;
1379 
1380   PetscFunctionBegin;
1381   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1382   PetscCall(PetscLayoutSetUp(mat->rmap));
1383   PetscCall(PetscLayoutSetUp(mat->cmap));
1384   if (!a->A) {
1385     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1386     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1387   }
1388 #if defined(PETSC_HAVE_CUDA)
1389   PetscBool iscuda;
1390   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1391   if (iscuda) mtype = MATSEQDENSECUDA;
1392 #endif
1393 #if defined(PETSC_HAVE_HIP)
1394   PetscBool iship;
1395   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1396   if (iship) mtype = MATSEQDENSEHIP;
1397 #endif
1398   PetscCall(MatSetType(a->A, mtype));
1399   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1400 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1401   mat->offloadmask = a->A->offloadmask;
1402 #endif
1403   mat->preallocated = PETSC_TRUE;
1404   mat->assembled    = PETSC_TRUE;
1405   PetscFunctionReturn(PETSC_SUCCESS);
1406 }
1407 
1408 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1409 {
1410   Mat B, C;
1411 
1412   PetscFunctionBegin;
1413   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1414   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1415   PetscCall(MatDestroy(&C));
1416   if (reuse == MAT_REUSE_MATRIX) {
1417     C = *newmat;
1418   } else C = NULL;
1419   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1420   PetscCall(MatDestroy(&B));
1421   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1422   else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1423   PetscFunctionReturn(PETSC_SUCCESS);
1424 }
1425 
1426 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1427 {
1428   Mat B, C;
1429 
1430   PetscFunctionBegin;
1431   PetscCall(MatDenseGetLocalMatrix(A, &C));
1432   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1433   if (reuse == MAT_REUSE_MATRIX) {
1434     C = *newmat;
1435   } else C = NULL;
1436   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1437   PetscCall(MatDestroy(&B));
1438   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1439   else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1440   PetscFunctionReturn(PETSC_SUCCESS);
1441 }
1442 
1443 #if defined(PETSC_HAVE_ELEMENTAL)
1444 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1445 {
1446   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1447   Mat           mat_elemental;
1448   PetscScalar  *v;
1449   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1450 
1451   PetscFunctionBegin;
1452   if (reuse == MAT_REUSE_MATRIX) {
1453     mat_elemental = *newmat;
1454     PetscCall(MatZeroEntries(*newmat));
1455   } else {
1456     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1457     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1458     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1459     PetscCall(MatSetUp(mat_elemental));
1460     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1461   }
1462 
1463   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1464   for (i = 0; i < N; i++) cols[i] = i;
1465   for (i = 0; i < m; i++) rows[i] = rstart + i;
1466 
1467   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1468   PetscCall(MatDenseGetArray(A, &v));
1469   PetscCall(MatDenseGetLDA(a->A, &lda));
1470   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1471   else {
1472     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1473   }
1474   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1475   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1476   PetscCall(MatDenseRestoreArray(A, &v));
1477   PetscCall(PetscFree2(rows, cols));
1478 
1479   if (reuse == MAT_INPLACE_MATRIX) {
1480     PetscCall(MatHeaderReplace(A, &mat_elemental));
1481   } else {
1482     *newmat = mat_elemental;
1483   }
1484   PetscFunctionReturn(PETSC_SUCCESS);
1485 }
1486 #endif
1487 
1488 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1489 {
1490   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1491 
1492   PetscFunctionBegin;
1493   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1498 {
1499   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1500 
1501   PetscFunctionBegin;
1502   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1503   PetscFunctionReturn(PETSC_SUCCESS);
1504 }
1505 
1506 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1507 {
1508   Mat_MPIDense *mat;
1509   PetscInt      m, nloc, N;
1510 
1511   PetscFunctionBegin;
1512   PetscCall(MatGetSize(inmat, &m, &N));
1513   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1514   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1515     PetscInt sum;
1516 
1517     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1518     /* Check sum(n) = N */
1519     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1520     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1521 
1522     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1523     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1524   }
1525 
1526   /* numeric phase */
1527   mat = (Mat_MPIDense *)(*outmat)->data;
1528   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1529   PetscFunctionReturn(PETSC_SUCCESS);
1530 }
1531 
1532 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1533 {
1534   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1535   PetscInt      lda;
1536 
1537   PetscFunctionBegin;
1538   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1539   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1540   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1541   a->vecinuse = col + 1;
1542   PetscCall(MatDenseGetLDA(a->A, &lda));
1543   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1544   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1545   *v = a->cvec;
1546   PetscFunctionReturn(PETSC_SUCCESS);
1547 }
1548 
1549 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1550 {
1551   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1552 
1553   PetscFunctionBegin;
1554   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1555   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1556   VecCheckAssembled(a->cvec);
1557   a->vecinuse = 0;
1558   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1559   PetscCall(VecResetArray(a->cvec));
1560   if (v) *v = NULL;
1561   PetscFunctionReturn(PETSC_SUCCESS);
1562 }
1563 
1564 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1565 {
1566   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1567   PetscInt      lda;
1568 
1569   PetscFunctionBegin;
1570   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1571   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1572   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1573   a->vecinuse = col + 1;
1574   PetscCall(MatDenseGetLDA(a->A, &lda));
1575   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1576   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1577   PetscCall(VecLockReadPush(a->cvec));
1578   *v = a->cvec;
1579   PetscFunctionReturn(PETSC_SUCCESS);
1580 }
1581 
1582 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1583 {
1584   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1585 
1586   PetscFunctionBegin;
1587   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1588   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1589   VecCheckAssembled(a->cvec);
1590   a->vecinuse = 0;
1591   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1592   PetscCall(VecLockReadPop(a->cvec));
1593   PetscCall(VecResetArray(a->cvec));
1594   if (v) *v = NULL;
1595   PetscFunctionReturn(PETSC_SUCCESS);
1596 }
1597 
1598 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1599 {
1600   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1601   PetscInt      lda;
1602 
1603   PetscFunctionBegin;
1604   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1605   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1606   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1607   a->vecinuse = col + 1;
1608   PetscCall(MatDenseGetLDA(a->A, &lda));
1609   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1610   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1611   *v = a->cvec;
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1616 {
1617   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1618 
1619   PetscFunctionBegin;
1620   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1621   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1622   VecCheckAssembled(a->cvec);
1623   a->vecinuse = 0;
1624   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1625   PetscCall(VecResetArray(a->cvec));
1626   if (v) *v = NULL;
1627   PetscFunctionReturn(PETSC_SUCCESS);
1628 }
1629 
1630 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1631 {
1632   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1633   Mat_MPIDense *c;
1634   MPI_Comm      comm;
1635   PetscInt      prbegin, prend, pcbegin, pcend;
1636 
1637   PetscFunctionBegin;
1638   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1639   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1640   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1641   prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1642   prend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1643   pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1644   pcend   = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1645   if (!a->cmat) {
1646     PetscCall(MatCreate(comm, &a->cmat));
1647     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1648     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1649     else {
1650       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1651       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1652       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1653     }
1654     if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1655     else {
1656       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1657       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1658       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1659     }
1660     c             = (Mat_MPIDense *)a->cmat->data;
1661     c->sub_rbegin = rbegin;
1662     c->sub_rend   = rend;
1663     c->sub_cbegin = cbegin;
1664     c->sub_cend   = cend;
1665   }
1666   c = (Mat_MPIDense *)a->cmat->data;
1667   if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1668     PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1669     PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1670     PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1671     PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1672     PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1673     c->sub_rbegin = rbegin;
1674     c->sub_rend   = rend;
1675   }
1676   if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1677     // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1678     if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1679       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1680       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1681       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1682       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1683       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1684       PetscCall(VecDestroy(&c->lvec));
1685       PetscCall(PetscSFDestroy(&c->Mvctx));
1686     }
1687     c->sub_cbegin = cbegin;
1688     c->sub_cend   = cend;
1689   }
1690   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1691   PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1692 
1693   a->cmat->preallocated = PETSC_TRUE;
1694   a->cmat->assembled    = PETSC_TRUE;
1695 #if defined(PETSC_HAVE_DEVICE)
1696   a->cmat->offloadmask = c->A->offloadmask;
1697 #endif
1698   a->matinuse = cbegin + 1;
1699   *v          = a->cmat;
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1704 {
1705   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1706   Mat_MPIDense *c;
1707 
1708   PetscFunctionBegin;
1709   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1710   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1711   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1712   a->matinuse = 0;
1713   c           = (Mat_MPIDense *)a->cmat->data;
1714   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1715   if (v) *v = NULL;
1716 #if defined(PETSC_HAVE_DEVICE)
1717   A->offloadmask = a->A->offloadmask;
1718 #endif
1719   PetscFunctionReturn(PETSC_SUCCESS);
1720 }
1721 
1722 /*MC
1723    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1724 
1725    Options Database Key:
1726 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1727 
1728   Level: beginner
1729 
1730 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1731 M*/
1732 PetscErrorCode MatCreate_MPIDense(Mat mat)
1733 {
1734   Mat_MPIDense *a;
1735 
1736   PetscFunctionBegin;
1737   PetscCall(PetscNew(&a));
1738   mat->data   = (void *)a;
1739   mat->ops[0] = MatOps_Values;
1740 
1741   mat->insertmode = NOT_SET_VALUES;
1742 
1743   /* build cache for off array entries formed */
1744   a->donotstash = PETSC_FALSE;
1745 
1746   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1747 
1748   /* stuff used for matrix vector multiply */
1749   a->lvec        = NULL;
1750   a->Mvctx       = NULL;
1751   a->roworiented = PETSC_TRUE;
1752 
1753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1774 #if defined(PETSC_HAVE_ELEMENTAL)
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1776 #endif
1777 #if defined(PETSC_HAVE_SCALAPACK)
1778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1779 #endif
1780 #if defined(PETSC_HAVE_CUDA)
1781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1782 #endif
1783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1786 #if defined(PETSC_HAVE_CUDA)
1787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1789 #endif
1790 #if defined(PETSC_HAVE_HIP)
1791   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1792   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1793   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1794 #endif
1795   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1796   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1797   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1798   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1799   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1800   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1801   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 /*MC
1806    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1807 
1808    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1809    and `MATMPIDENSE` otherwise.
1810 
1811    Options Database Key:
1812 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1813 
1814   Level: beginner
1815 
1816 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1817 M*/
1818 
1819 /*@
1820   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1821 
1822   Collective
1823 
1824   Input Parameters:
1825 + B    - the matrix
1826 - data - optional location of matrix data.  Set to `NULL` for PETSc
1827          to control all matrix memory allocation.
1828 
1829   Level: intermediate
1830 
1831   Notes:
1832   The dense format is fully compatible with standard Fortran
1833   storage by columns.
1834 
1835   The data input variable is intended primarily for Fortran programmers
1836   who wish to allocate their own matrix memory space.  Most users should
1837   set `data` to `NULL`.
1838 
1839 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1840 @*/
1841 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1842 {
1843   PetscFunctionBegin;
1844   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1845   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1846   PetscFunctionReturn(PETSC_SUCCESS);
1847 }
1848 
1849 /*@
1850   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1851   array provided by the user. This is useful to avoid copying an array
1852   into a matrix
1853 
1854   Not Collective
1855 
1856   Input Parameters:
1857 + mat   - the matrix
1858 - array - the array in column major order
1859 
1860   Level: developer
1861 
1862   Note:
1863   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1864 
1865   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1866   freed when the matrix is destroyed.
1867 
1868 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1869           `MatDenseReplaceArray()`
1870 @*/
1871 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1872 {
1873   PetscFunctionBegin;
1874   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1875   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1876   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1877 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1878   mat->offloadmask = PETSC_OFFLOAD_CPU;
1879 #endif
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 /*@
1884   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1885 
1886   Not Collective
1887 
1888   Input Parameter:
1889 . mat - the matrix
1890 
1891   Level: developer
1892 
1893   Note:
1894   You can only call this after a call to `MatDensePlaceArray()`
1895 
1896 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1897 @*/
1898 PetscErrorCode MatDenseResetArray(Mat mat)
1899 {
1900   PetscFunctionBegin;
1901   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1902   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1903   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1904   PetscFunctionReturn(PETSC_SUCCESS);
1905 }
1906 
1907 /*@
1908   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1909   array provided by the user. This is useful to avoid copying an array
1910   into a matrix
1911 
1912   Not Collective
1913 
1914   Input Parameters:
1915 + mat   - the matrix
1916 - array - the array in column major order
1917 
1918   Level: developer
1919 
1920   Note:
1921   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1922 
1923   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1924   freed by the user. It will be freed when the matrix is destroyed.
1925 
1926 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1927 @*/
1928 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1929 {
1930   PetscFunctionBegin;
1931   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1932   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1933   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1934 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1935   mat->offloadmask = PETSC_OFFLOAD_CPU;
1936 #endif
1937   PetscFunctionReturn(PETSC_SUCCESS);
1938 }
1939 
1940 /*@
1941   MatCreateDense - Creates a matrix in `MATDENSE` format.
1942 
1943   Collective
1944 
1945   Input Parameters:
1946 + comm - MPI communicator
1947 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1948 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1949 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1950 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1951 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1952    to control all matrix memory allocation.
1953 
1954   Output Parameter:
1955 . A - the matrix
1956 
1957   Level: intermediate
1958 
1959   Notes:
1960   The dense format is fully compatible with standard Fortran
1961   storage by columns.
1962 
1963   Although local portions of the matrix are stored in column-major
1964   order, the matrix is partitioned across MPI ranks by row.
1965 
1966   The data input variable is intended primarily for Fortran programmers
1967   who wish to allocate their own matrix memory space.  Most users should
1968   set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).
1969 
1970   The user MUST specify either the local or global matrix dimensions
1971   (possibly both).
1972 
1973 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1974 @*/
1975 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1976 {
1977   PetscFunctionBegin;
1978   PetscCall(MatCreate(comm, A));
1979   PetscCall(MatSetSizes(*A, m, n, M, N));
1980   PetscCall(MatSetType(*A, MATDENSE));
1981   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1982   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1983   PetscFunctionReturn(PETSC_SUCCESS);
1984 }
1985 
1986 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1987 {
1988   Mat           mat;
1989   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1990 
1991   PetscFunctionBegin;
1992   *newmat = NULL;
1993   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1994   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1995   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1996   a = (Mat_MPIDense *)mat->data;
1997 
1998   mat->factortype   = A->factortype;
1999   mat->assembled    = PETSC_TRUE;
2000   mat->preallocated = PETSC_TRUE;
2001 
2002   mat->insertmode = NOT_SET_VALUES;
2003   a->donotstash   = oldmat->donotstash;
2004 
2005   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2006   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2007 
2008   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2009 
2010   *newmat = mat;
2011   PetscFunctionReturn(PETSC_SUCCESS);
2012 }
2013 
2014 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2015 {
2016   PetscBool isbinary;
2017 #if defined(PETSC_HAVE_HDF5)
2018   PetscBool ishdf5;
2019 #endif
2020 
2021   PetscFunctionBegin;
2022   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2023   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2024   /* force binary viewer to load .info file if it has not yet done so */
2025   PetscCall(PetscViewerSetUp(viewer));
2026   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2027 #if defined(PETSC_HAVE_HDF5)
2028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2029 #endif
2030   if (isbinary) {
2031     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2032 #if defined(PETSC_HAVE_HDF5)
2033   } else if (ishdf5) {
2034     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2035 #endif
2036   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 
2040 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2041 {
2042   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2043   Mat           a, b;
2044 
2045   PetscFunctionBegin;
2046   a = matA->A;
2047   b = matB->A;
2048   PetscCall(MatEqual(a, b, flag));
2049   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2050   PetscFunctionReturn(PETSC_SUCCESS);
2051 }
2052 
2053 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2054 {
2055   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2056 
2057   PetscFunctionBegin;
2058   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2059   PetscCall(MatDestroy(&atb->atb));
2060   PetscCall(PetscFree(atb));
2061   PetscFunctionReturn(PETSC_SUCCESS);
2062 }
2063 
2064 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2065 {
2066   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2067 
2068   PetscFunctionBegin;
2069   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2070   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2071   PetscCall(PetscFree(abt));
2072   PetscFunctionReturn(PETSC_SUCCESS);
2073 }
2074 
2075 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2076 {
2077   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2078   Mat_TransMatMultDense *atb;
2079   MPI_Comm               comm;
2080   PetscMPIInt            size, *recvcounts;
2081   PetscScalar           *carray, *sendbuf;
2082   const PetscScalar     *atbarray;
2083   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2084   const PetscInt        *ranges;
2085 
2086   PetscFunctionBegin;
2087   MatCheckProduct(C, 3);
2088   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2089   atb        = (Mat_TransMatMultDense *)C->product->data;
2090   recvcounts = atb->recvcounts;
2091   sendbuf    = atb->sendbuf;
2092 
2093   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2094   PetscCallMPI(MPI_Comm_size(comm, &size));
2095 
2096   /* compute atbarray = aseq^T * bseq */
2097   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2098 
2099   PetscCall(MatGetOwnershipRanges(C, &ranges));
2100 
2101   if (ranges[1] == C->rmap->N) {
2102     /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2103     PetscInt           atb_lda, c_lda;
2104     Mat                atb_local = atb->atb;
2105     Mat                atb_alloc = NULL;
2106     Mat                c_local   = c->A;
2107     Mat                c_alloc   = NULL;
2108     PetscMemType       atb_memtype, c_memtype;
2109     const PetscScalar *atb_array = NULL;
2110     MPI_Datatype       vector_type;
2111     PetscScalar       *c_array = NULL;
2112     PetscMPIInt        rank;
2113 
2114     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2115 
2116     PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2117     if (atb_lda != C->rmap->N) {
2118       // copy atb to a matrix that will have lda == the number of rows
2119       PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2120       PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2121       atb_local = atb_alloc;
2122     }
2123 
2124     if (rank == 0) {
2125       PetscCall(MatDenseGetLDA(c_local, &c_lda));
2126       if (c_lda != C->rmap->N) {
2127         // copy c to a matrix that will have lda == the number of rows
2128         PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2129         c_local = c_alloc;
2130       }
2131       PetscCall(MatZeroEntries(c_local));
2132     }
2133     /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2134      * B->cmap->N: use the a->Mvctx to use the best reduction method */
2135     if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2136     vector_type = MPIU_SCALAR;
2137     if (B->cmap->N > 1) {
2138       PetscMPIInt mpi_N;
2139 
2140       PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2141       PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2142       PetscCallMPI(MPI_Type_commit(&vector_type));
2143     }
2144     PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2145     PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2146     PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2147     PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2148     PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2149     PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2150     if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2151     if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2152     PetscCall(MatDestroy(&atb_alloc));
2153     PetscCall(MatDestroy(&c_alloc));
2154     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2155     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2156     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2157     PetscFunctionReturn(PETSC_SUCCESS);
2158   }
2159 
2160   /* arrange atbarray into sendbuf */
2161   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2162   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2163   for (proc = 0, k = 0; proc < size; proc++) {
2164     for (j = 0; j < cN; j++) {
2165       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2166     }
2167   }
2168   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2169 
2170   /* sum all atbarray to local values of C */
2171   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2172   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2173   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2174   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2175   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2176   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2177   PetscFunctionReturn(PETSC_SUCCESS);
2178 }
2179 
2180 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2181 {
2182   MPI_Comm               comm;
2183   PetscMPIInt            size;
2184   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2185   Mat_TransMatMultDense *atb;
2186   PetscBool              cisdense = PETSC_FALSE;
2187   const PetscInt        *ranges;
2188 
2189   PetscFunctionBegin;
2190   MatCheckProduct(C, 4);
2191   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2192   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2193   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart,
2194              A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2195 
2196   /* create matrix product C */
2197   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2198 #if defined(PETSC_HAVE_CUDA)
2199   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2200 #endif
2201 #if defined(PETSC_HAVE_HIP)
2202   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2203 #endif
2204   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2205   PetscCall(MatSetUp(C));
2206 
2207   /* create data structure for reuse C */
2208   PetscCallMPI(MPI_Comm_size(comm, &size));
2209   PetscCall(PetscNew(&atb));
2210   cM = C->rmap->N;
2211   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2212   PetscCall(MatGetOwnershipRanges(C, &ranges));
2213   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2214   C->product->data    = atb;
2215   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2216   PetscFunctionReturn(PETSC_SUCCESS);
2217 }
2218 
2219 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2220 {
2221   MPI_Comm               comm;
2222   PetscMPIInt            i, size;
2223   PetscInt               maxRows, bufsiz;
2224   PetscMPIInt            tag;
2225   PetscInt               alg;
2226   Mat_MatTransMultDense *abt;
2227   Mat_Product           *product = C->product;
2228   PetscBool              flg;
2229 
2230   PetscFunctionBegin;
2231   MatCheckProduct(C, 4);
2232   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2233   /* check local size of A and B */
2234   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2235 
2236   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2237   alg = flg ? 0 : 1;
2238 
2239   /* setup matrix product C */
2240   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2241   PetscCall(MatSetType(C, MATMPIDENSE));
2242   PetscCall(MatSetUp(C));
2243   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2244 
2245   /* create data structure for reuse C */
2246   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2247   PetscCallMPI(MPI_Comm_size(comm, &size));
2248   PetscCall(PetscNew(&abt));
2249   abt->tag = tag;
2250   abt->alg = alg;
2251   switch (alg) {
2252   case 1: /* alg: "cyclic" */
2253     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2254     bufsiz = A->cmap->N * maxRows;
2255     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2256     break;
2257   default: /* alg: "allgatherv" */
2258     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2259     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2260     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2261     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2262     break;
2263   }
2264 
2265   C->product->data                = abt;
2266   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2267   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2268   PetscFunctionReturn(PETSC_SUCCESS);
2269 }
2270 
2271 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2272 {
2273   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2274   Mat_MatTransMultDense *abt;
2275   MPI_Comm               comm;
2276   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2277   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2278   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2279   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2280   const PetscScalar     *av, *bv;
2281   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2282   MPI_Request            reqs[2];
2283   const PetscInt        *ranges;
2284 
2285   PetscFunctionBegin;
2286   MatCheckProduct(C, 3);
2287   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2288   abt = (Mat_MatTransMultDense *)C->product->data;
2289   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2290   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2291   PetscCallMPI(MPI_Comm_size(comm, &size));
2292   PetscCall(MatDenseGetArrayRead(a->A, &av));
2293   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2294   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2295   PetscCall(MatDenseGetLDA(a->A, &i));
2296   PetscCall(PetscBLASIntCast(i, &alda));
2297   PetscCall(MatDenseGetLDA(b->A, &i));
2298   PetscCall(PetscBLASIntCast(i, &blda));
2299   PetscCall(MatDenseGetLDA(c->A, &i));
2300   PetscCall(PetscBLASIntCast(i, &clda));
2301   PetscCall(MatGetOwnershipRanges(B, &ranges));
2302   bn = B->rmap->n;
2303   if (blda == bn) {
2304     sendbuf = (PetscScalar *)bv;
2305   } else {
2306     sendbuf = abt->buf[0];
2307     for (k = 0, i = 0; i < cK; i++) {
2308       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2309     }
2310   }
2311   if (size > 1) {
2312     sendto   = (rank + size - 1) % size;
2313     recvfrom = (rank + size + 1) % size;
2314   } else {
2315     sendto = recvfrom = 0;
2316   }
2317   PetscCall(PetscBLASIntCast(cK, &ck));
2318   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2319   recvisfrom = rank;
2320   for (i = 0; i < size; i++) {
2321     /* we have finished receiving in sending, bufs can be read/modified */
2322     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2323     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2324 
2325     if (nextrecvisfrom != rank) {
2326       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2327       sendsiz = cK * bn;
2328       recvsiz = cK * nextbn;
2329       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2330       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2331       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2332     }
2333 
2334     /* local aseq * sendbuf^T */
2335     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2336     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2337 
2338     if (nextrecvisfrom != rank) {
2339       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2340       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2341     }
2342     bn         = nextbn;
2343     recvisfrom = nextrecvisfrom;
2344     sendbuf    = recvbuf;
2345   }
2346   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2347   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2348   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2349   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2350   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2351   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2352   PetscFunctionReturn(PETSC_SUCCESS);
2353 }
2354 
2355 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2356 {
2357   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2358   Mat_MatTransMultDense *abt;
2359   MPI_Comm               comm;
2360   PetscMPIInt            size, ibn;
2361   PetscScalar           *cv, *sendbuf, *recvbuf;
2362   const PetscScalar     *av, *bv;
2363   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2364   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2365   PetscBLASInt           cm, cn, ck, alda, clda;
2366 
2367   PetscFunctionBegin;
2368   MatCheckProduct(C, 3);
2369   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2370   abt = (Mat_MatTransMultDense *)C->product->data;
2371   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2372   PetscCallMPI(MPI_Comm_size(comm, &size));
2373   PetscCall(MatDenseGetArrayRead(a->A, &av));
2374   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2375   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2376   PetscCall(MatDenseGetLDA(a->A, &i));
2377   PetscCall(PetscBLASIntCast(i, &alda));
2378   PetscCall(MatDenseGetLDA(b->A, &blda));
2379   PetscCall(MatDenseGetLDA(c->A, &i));
2380   PetscCall(PetscBLASIntCast(i, &clda));
2381   /* copy transpose of B into buf[0] */
2382   bn      = B->rmap->n;
2383   sendbuf = abt->buf[0];
2384   recvbuf = abt->buf[1];
2385   for (k = 0, j = 0; j < bn; j++) {
2386     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2387   }
2388   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2389   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2390   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2391   PetscCall(PetscBLASIntCast(cK, &ck));
2392   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2393   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2394   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2395   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2396   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2397   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2398   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2399   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2400   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2401   PetscFunctionReturn(PETSC_SUCCESS);
2402 }
2403 
2404 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2405 {
2406   Mat_MatTransMultDense *abt;
2407 
2408   PetscFunctionBegin;
2409   MatCheckProduct(C, 3);
2410   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2411   abt = (Mat_MatTransMultDense *)C->product->data;
2412   switch (abt->alg) {
2413   case 1:
2414     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2415     break;
2416   default:
2417     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2418     break;
2419   }
2420   PetscFunctionReturn(PETSC_SUCCESS);
2421 }
2422 
2423 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2424 {
2425   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2426 
2427   PetscFunctionBegin;
2428   PetscCall(MatDestroy(&ab->Ce));
2429   PetscCall(MatDestroy(&ab->Ae));
2430   PetscCall(MatDestroy(&ab->Be));
2431   PetscCall(PetscFree(ab));
2432   PetscFunctionReturn(PETSC_SUCCESS);
2433 }
2434 
2435 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2436 {
2437   Mat_MatMultDense *ab;
2438   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2439   Mat_MPIDense     *b   = (Mat_MPIDense *)B->data;
2440 
2441   PetscFunctionBegin;
2442   MatCheckProduct(C, 3);
2443   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2444   ab = (Mat_MatMultDense *)C->product->data;
2445   if (ab->Ae && ab->Ce) {
2446 #if PetscDefined(HAVE_ELEMENTAL)
2447     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2448     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2449     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2450     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2451 #else
2452     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2453 #endif
2454   } else {
2455     MPI_Comm           comm;
2456     const PetscScalar *read;
2457     PetscScalar       *write;
2458     PetscInt           lda;
2459     const PetscInt    *ranges;
2460     PetscMPIInt        size;
2461 
2462     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2463     comm = PetscObjectComm((PetscObject)B);
2464     PetscCallMPI(MPI_Comm_size(comm, &size));
2465     PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2466     if (ranges[1] == ranges[size]) {
2467       // optimize for the case where the B matrix is broadcast from rank 0
2468       PetscInt           b_lda, be_lda;
2469       Mat                b_local  = b->A;
2470       Mat                b_alloc  = NULL;
2471       Mat                be_local = ab->Be;
2472       Mat                be_alloc = NULL;
2473       PetscMemType       b_memtype, be_memtype;
2474       const PetscScalar *b_array = NULL;
2475       MPI_Datatype       vector_type;
2476       PetscScalar       *be_array = NULL;
2477       PetscMPIInt        rank;
2478 
2479       PetscCallMPI(MPI_Comm_rank(comm, &rank));
2480       PetscCall(MatDenseGetLDA(be_local, &be_lda));
2481       if (be_lda != B->rmap->N) {
2482         PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2483         be_local = be_alloc;
2484       }
2485 
2486       if (rank == 0) {
2487         PetscCall(MatDenseGetLDA(b_local, &b_lda));
2488         if (b_lda != B->rmap->N) {
2489           PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2490           PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2491           b_local = b_alloc;
2492         }
2493       }
2494       vector_type = MPIU_SCALAR;
2495       if (B->cmap->N > 1) {
2496         PetscMPIInt mpi_N;
2497 
2498         PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2499         PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2500         PetscCallMPI(MPI_Type_commit(&vector_type));
2501       }
2502       PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2503       PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2504       PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2505       PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2506       PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2507       PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2508       if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2509       if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2510       PetscCall(MatDestroy(&be_alloc));
2511       PetscCall(MatDestroy(&b_alloc));
2512     } else {
2513       PetscCall(MatDenseGetLDA(B, &lda));
2514       PetscCall(MatDenseGetArrayRead(B, &read));
2515       PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2516       for (PetscInt i = 0; i < C->cmap->N; ++i) {
2517         PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2518         PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2519       }
2520       PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2521       PetscCall(MatDenseRestoreArrayRead(B, &read));
2522     }
2523     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2524   }
2525   PetscFunctionReturn(PETSC_SUCCESS);
2526 }
2527 
2528 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2529 {
2530   Mat_Product      *product = C->product;
2531   PetscInt          alg;
2532   Mat_MatMultDense *ab;
2533   PetscBool         flg;
2534 
2535   PetscFunctionBegin;
2536   MatCheckProduct(C, 4);
2537   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2538   /* check local size of A and B */
2539   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2540              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2541 
2542   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2543   alg = flg ? 0 : 1;
2544 
2545   /* setup C */
2546   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2547   PetscCall(MatSetType(C, MATMPIDENSE));
2548   PetscCall(MatSetUp(C));
2549 
2550   /* create data structure for reuse Cdense */
2551   PetscCall(PetscNew(&ab));
2552 
2553   switch (alg) {
2554   case 1: /* alg: "elemental" */
2555 #if PetscDefined(HAVE_ELEMENTAL)
2556     /* create elemental matrices Ae and Be */
2557     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2558     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2559     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2560     PetscCall(MatSetUp(ab->Ae));
2561     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2562 
2563     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2564     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2565     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2566     PetscCall(MatSetUp(ab->Be));
2567     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2568 
2569     /* compute symbolic Ce = Ae*Be */
2570     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2571     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2572 #else
2573     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2574 #endif
2575     break;
2576   default: /* alg: "petsc" */
2577     ab->Ae = NULL;
2578     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2579     ab->Ce = NULL;
2580     break;
2581   }
2582 
2583   C->product->data       = ab;
2584   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2585   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2590 {
2591   Mat_Product *product     = C->product;
2592   const char  *algTypes[2] = {"petsc", "elemental"};
2593   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2594   PetscBool    flg = PETSC_FALSE;
2595 
2596   PetscFunctionBegin;
2597   /* Set default algorithm */
2598   alg = 0; /* default is PETSc */
2599   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2600   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2601 
2602   /* Get runtime option */
2603   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2604   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2605   PetscOptionsEnd();
2606   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2607 
2608   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2609   C->ops->productsymbolic = MatProductSymbolic_AB;
2610   PetscFunctionReturn(PETSC_SUCCESS);
2611 }
2612 
2613 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2614 {
2615   Mat_Product *product = C->product;
2616   Mat          A = product->A, B = product->B;
2617 
2618   PetscFunctionBegin;
2619   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2620              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2621   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2622   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2623   PetscFunctionReturn(PETSC_SUCCESS);
2624 }
2625 
2626 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2627 {
2628   Mat_Product *product     = C->product;
2629   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2630   PetscInt     alg, nalg = 2;
2631   PetscBool    flg = PETSC_FALSE;
2632 
2633   PetscFunctionBegin;
2634   /* Set default algorithm */
2635   alg = 0; /* default is allgatherv */
2636   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2637   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2638 
2639   /* Get runtime option */
2640   if (product->api_user) {
2641     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2642     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2643     PetscOptionsEnd();
2644   } else {
2645     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2646     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2647     PetscOptionsEnd();
2648   }
2649   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2650 
2651   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2652   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2653   PetscFunctionReturn(PETSC_SUCCESS);
2654 }
2655 
2656 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2657 {
2658   Mat_Product *product = C->product;
2659 
2660   PetscFunctionBegin;
2661   switch (product->type) {
2662   case MATPRODUCT_AB:
2663     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2664     break;
2665   case MATPRODUCT_AtB:
2666     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2667     break;
2668   case MATPRODUCT_ABt:
2669     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2670     break;
2671   default:
2672     break;
2673   }
2674   PetscFunctionReturn(PETSC_SUCCESS);
2675 }
2676