xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5b669ad3ce6cd664db9332dbc899e086ae7cd27f)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 #include <petscdevice.h>
12 #include <petsc/private/deviceimpl.h>
13 
14 /*@
15   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16   matrix that represents the operator. For sequential matrices it returns itself.
17 
18   Input Parameter:
19 . A - the sequential or MPI `MATDENSE` matrix
20 
21   Output Parameter:
22 . B - the inner matrix
23 
24   Level: intermediate
25 
26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27 @*/
28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29 {
30   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31   PetscBool     flg;
32 
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
35   PetscAssertPointer(B, 2);
36   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37   if (flg) *B = mat->A;
38   else {
39     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41     *B = A;
42   }
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47 {
48   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
50 
51   PetscFunctionBegin;
52   PetscCall(MatCopy(Amat->A, Bmat->A, s));
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57 {
58   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60   PetscScalar  *v;
61 
62   PetscFunctionBegin;
63   PetscCall(MatDenseGetArray(mat->A, &v));
64   PetscCall(MatDenseGetLDA(mat->A, &lda));
65   rend2 = PetscMin(rend, A->cmap->N);
66   if (rend2 > rstart) {
67     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68     PetscCall(PetscLogFlops(rend2 - rstart));
69   }
70   PetscCall(MatDenseRestoreArray(mat->A, &v));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75 {
76   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
78 
79   PetscFunctionBegin;
80   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81   lrow = row - rstart;
82   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87 {
88   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
90 
91   PetscFunctionBegin;
92   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93   lrow = row - rstart;
94   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
102   PetscScalar  *array;
103   MPI_Comm      comm;
104   PetscBool     flg;
105   Mat           B;
106 
107   PetscFunctionBegin;
108   PetscCall(MatHasCongruentLayouts(A, &flg));
109   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112 #if PetscDefined(HAVE_CUDA)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115 #elif PetscDefined(HAVE_HIP)
116     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118 #endif
119     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120     PetscCall(MatCreate(comm, &B));
121     PetscCall(MatSetSizes(B, m, m, m, m));
122     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127     *a = B;
128     PetscCall(MatDestroy(&B));
129   } else *a = B;
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134 {
135   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137   PetscBool     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i = 0; i < m; i++) {
141     if (idxm[i] < 0) continue;
142     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147       } else {
148         for (j = 0; j < n; j++) {
149           if (idxn[j] < 0) continue;
150           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152         }
153       }
154     } else if (!A->donotstash) {
155       mat->assembled = PETSC_FALSE;
156       if (roworiented) {
157         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158       } else {
159         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160       }
161     }
162   }
163   PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 
166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167 {
168   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
170 
171   PetscFunctionBegin;
172   for (i = 0; i < m; i++) {
173     if (idxm[i] < 0) continue; /* negative row */
174     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175     if (idxm[i] >= rstart && idxm[i] < rend) {
176       row = idxm[i] - rstart;
177       for (j = 0; j < n; j++) {
178         if (idxn[j] < 0) continue; /* negative column */
179         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181       }
182     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
188 {
189   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
190 
191   PetscFunctionBegin;
192   PetscCall(MatDenseGetLDA(a->A, lda));
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
196 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
197 {
198   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
199   MatType       mtype = MATSEQDENSE;
200 
201   PetscFunctionBegin;
202   if (!a->A) {
203     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
204     PetscCall(PetscLayoutSetUp(A->rmap));
205     PetscCall(PetscLayoutSetUp(A->cmap));
206     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
207     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
208 #if PetscDefined(HAVE_CUDA)
209     PetscBool iscuda;
210     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
211     if (iscuda) mtype = MATSEQDENSECUDA;
212 #elif PetscDefined(HAVE_HIP)
213     PetscBool iship;
214     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
215     if (iship) mtype = MATSEQDENSEHIP;
216 #endif
217     PetscCall(MatSetType(a->A, mtype));
218   }
219   PetscCall(MatDenseSetLDA(a->A, lda));
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
223 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
224 {
225   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
226 
227   PetscFunctionBegin;
228   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
229   PetscCall(MatDenseGetArray(a->A, array));
230   PetscFunctionReturn(PETSC_SUCCESS);
231 }
232 
233 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
234 {
235   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
236 
237   PetscFunctionBegin;
238   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
239   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
244 {
245   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
246 
247   PetscFunctionBegin;
248   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
249   PetscCall(MatDenseGetArrayWrite(a->A, array));
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
254 {
255   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
256 
257   PetscFunctionBegin;
258   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
259   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
260   PetscCall(MatDensePlaceArray(a->A, array));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
265 {
266   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
267 
268   PetscFunctionBegin;
269   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
270   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
271   PetscCall(MatDenseResetArray(a->A));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
276 {
277   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
278 
279   PetscFunctionBegin;
280   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
281   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
282   PetscCall(MatDenseReplaceArray(a->A, array));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 
286 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
287 {
288   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
289   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
290   const PetscInt    *irow, *icol;
291   const PetscScalar *v;
292   PetscScalar       *bv;
293   Mat                newmat;
294   IS                 iscol_local;
295   MPI_Comm           comm_is, comm_mat;
296 
297   PetscFunctionBegin;
298   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
299   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
300   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
301 
302   PetscCall(ISAllGather(iscol, &iscol_local));
303   PetscCall(ISGetIndices(isrow, &irow));
304   PetscCall(ISGetIndices(iscol_local, &icol));
305   PetscCall(ISGetLocalSize(isrow, &nrows));
306   PetscCall(ISGetLocalSize(iscol, &ncols));
307   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
308 
309   /* No parallel redistribution currently supported! Should really check each index set
310      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
311      original matrix! */
312 
313   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
314   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
315 
316   /* Check submatrix call */
317   if (scall == MAT_REUSE_MATRIX) {
318     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
319     /* Really need to test rows and column sizes! */
320     newmat = *B;
321   } else {
322     /* Create and fill new matrix */
323     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
324     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
325     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
326     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
327   }
328 
329   /* Now extract the data pointers and do the copy, column at a time */
330   newmatd = (Mat_MPIDense *)newmat->data;
331   PetscCall(MatDenseGetArray(newmatd->A, &bv));
332   PetscCall(MatDenseGetArrayRead(mat->A, &v));
333   PetscCall(MatDenseGetLDA(mat->A, &lda));
334   for (i = 0; i < Ncols; i++) {
335     const PetscScalar *av = v + lda * icol[i];
336     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
337   }
338   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
339   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
340 
341   /* Assemble the matrices so that the correct flags are set */
342   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
343   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
344 
345   /* Free work space */
346   PetscCall(ISRestoreIndices(isrow, &irow));
347   PetscCall(ISRestoreIndices(iscol_local, &icol));
348   PetscCall(ISDestroy(&iscol_local));
349   *B = newmat;
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
353 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
354 {
355   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
356 
357   PetscFunctionBegin;
358   PetscCall(MatDenseRestoreArray(a->A, array));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
363 {
364   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
365 
366   PetscFunctionBegin;
367   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
372 {
373   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
374 
375   PetscFunctionBegin;
376   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
381 {
382   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
383   PetscInt      nstash, reallocs;
384 
385   PetscFunctionBegin;
386   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
387 
388   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
389   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
390   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
395 {
396   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
397   PetscInt      i, *row, *col, flg, j, rstart, ncols;
398   PetscMPIInt   n;
399   PetscScalar  *val;
400 
401   PetscFunctionBegin;
402   if (!mdn->donotstash && !mat->nooffprocentries) {
403     /*  wait on receives */
404     while (1) {
405       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
406       if (!flg) break;
407 
408       for (i = 0; i < n;) {
409         /* Now identify the consecutive vals belonging to the same row */
410         for (j = i, rstart = row[j]; j < n; j++) {
411           if (row[j] != rstart) break;
412         }
413         if (j < n) ncols = j - i;
414         else ncols = n - i;
415         /* Now assemble all these values with a single function call */
416         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
417         i = j;
418       }
419     }
420     PetscCall(MatStashScatterEnd_Private(&mat->stash));
421   }
422 
423   PetscCall(MatAssemblyBegin(mdn->A, mode));
424   PetscCall(MatAssemblyEnd(mdn->A, mode));
425   PetscFunctionReturn(PETSC_SUCCESS);
426 }
427 
428 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
429 {
430   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
431 
432   PetscFunctionBegin;
433   PetscCall(MatZeroEntries(l->A));
434   PetscFunctionReturn(PETSC_SUCCESS);
435 }
436 
437 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
438 {
439   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
440   PetscInt      i, len, *lrows;
441 
442   PetscFunctionBegin;
443   /* get locally owned rows */
444   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
445   /* fix right-hand side if needed */
446   if (x && b) {
447     const PetscScalar *xx;
448     PetscScalar       *bb;
449 
450     PetscCall(VecGetArrayRead(x, &xx));
451     PetscCall(VecGetArrayWrite(b, &bb));
452     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
453     PetscCall(VecRestoreArrayRead(x, &xx));
454     PetscCall(VecRestoreArrayWrite(b, &bb));
455   }
456   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
457   if (diag != 0.0) {
458     Vec d;
459 
460     PetscCall(MatCreateVecs(A, NULL, &d));
461     PetscCall(VecSet(d, diag));
462     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
463     PetscCall(VecDestroy(&d));
464   }
465   PetscCall(PetscFree(lrows));
466   PetscFunctionReturn(PETSC_SUCCESS);
467 }
468 
469 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
471 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
472 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
473 
474 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
475 {
476   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
477   const PetscScalar *ax;
478   PetscScalar       *ay;
479   PetscMemType       axmtype, aymtype;
480 
481   PetscFunctionBegin;
482   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
483   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
484   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
485   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
486   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
487   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
488   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
489   PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
493 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
494 {
495   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
496   const PetscScalar *ax;
497   PetscScalar       *ay;
498   PetscMemType       axmtype, aymtype;
499 
500   PetscFunctionBegin;
501   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
502   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
503   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
504   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
505   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
506   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
507   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
508   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
513 {
514   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
515   const PetscScalar *ax;
516   PetscScalar       *ay;
517   PetscMemType       axmtype, aymtype;
518 
519   PetscFunctionBegin;
520   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
521   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
523   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
524   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
525   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
526   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
527   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
532 {
533   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType       axmtype, aymtype;
537 
538   PetscFunctionBegin;
539   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
540   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
542   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
543   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
544   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
545   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
546   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   const PetscScalar *ax;
554   PetscScalar       *ay;
555   PetscMemType       axmtype, aymtype;
556   PetscInt           r_start, r_end;
557   PetscInt           c_start_local, c_end_local;
558 
559   PetscFunctionBegin;
560   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
561   PetscCall(VecZeroEntries(a->lvec));
562   PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
563   c_start_local = PetscMax(c_start, r_start);
564   c_end_local   = PetscMin(c_end, r_end);
565   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
566   if (c_end_local > c_start_local) {
567     if (PetscMemTypeHost(aymtype)) {
568       PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
569     } else {
570       PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
571       PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
572     }
573   }
574   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
575   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
576   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
577   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
578   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
579   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
580   PetscFunctionReturn(PETSC_SUCCESS);
581 }
582 
583 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
584 {
585   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
586   const PetscScalar *ax;
587   PetscScalar       *ay;
588   PetscMemType       axmtype, aymtype;
589 
590   PetscFunctionBegin;
591   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
592   PetscCall(VecSet(yy, 0.0));
593   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
594   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
595   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
596   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
597   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
598   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
599   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
600   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
601   PetscFunctionReturn(PETSC_SUCCESS);
602 }
603 
604 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
605 {
606   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
607   const PetscScalar *ax;
608   PetscScalar       *ay;
609   PetscMemType       axmtype, aymtype;
610 
611   PetscFunctionBegin;
612   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
613   PetscCall(VecCopy(yy, zz));
614   PetscCall(VecZeroEntries(a->lvec));
615   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
616   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
617   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
618   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
619   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
620   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
621   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
626 {
627   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
628   const PetscScalar *ax;
629   PetscScalar       *ay;
630   PetscMemType       axmtype, aymtype;
631 
632   PetscFunctionBegin;
633   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
634   PetscCall(VecCopy(yy, zz));
635   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
636   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
637   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
638   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
639   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
640   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
641   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
642   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
647 {
648   PetscFunctionBegin;
649   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
654 {
655   PetscFunctionBegin;
656   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
657   PetscFunctionReturn(PETSC_SUCCESS);
658 }
659 
660 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
661 {
662   PetscFunctionBegin;
663   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
668 {
669   PetscFunctionBegin;
670   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
671   PetscFunctionReturn(PETSC_SUCCESS);
672 }
673 
674 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
675 {
676   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
677   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
678   PetscScalar       *x;
679   const PetscScalar *av;
680 
681   PetscFunctionBegin;
682   PetscCall(VecGetArray(v, &x));
683   PetscCall(VecGetSize(v, &ng));
684   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
685   PetscCall(VecGetLocalSize(v, &nl));
686   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
687   radd = A->rmap->rstart * m;
688   PetscCall(MatDenseGetArrayRead(a->A, &av));
689   PetscCall(MatDenseGetLDA(a->A, &lda));
690   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
691   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
692   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
693   PetscCall(VecRestoreArray(v, &x));
694   PetscFunctionReturn(PETSC_SUCCESS);
695 }
696 
697 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
698 {
699   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
700 
701   PetscFunctionBegin;
702   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
703   PetscCall(MatStashDestroy_Private(&mat->stash));
704   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
705   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
706   PetscCall(MatDestroy(&mdn->A));
707   PetscCall(VecDestroy(&mdn->lvec));
708   PetscCall(PetscSFDestroy(&mdn->Mvctx));
709   PetscCall(VecDestroy(&mdn->cvec));
710   PetscCall(MatDestroy(&mdn->cmat));
711 
712   PetscCall(PetscFree(mat->data));
713   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
714 
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
728 #if defined(PETSC_HAVE_ELEMENTAL)
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
730 #endif
731 #if defined(PETSC_HAVE_SCALAPACK)
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
733 #endif
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
737 #if defined(PETSC_HAVE_CUDA)
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
756 #endif
757 #if defined(PETSC_HAVE_HIP)
758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
776 #endif
777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
782   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
790   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
791 
792   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
793   PetscFunctionReturn(PETSC_SUCCESS);
794 }
795 
796 #include <petscdraw.h>
797 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
798 {
799   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
800   PetscMPIInt       rank;
801   PetscViewerType   vtype;
802   PetscBool         iascii, isdraw;
803   PetscViewer       sviewer;
804   PetscViewerFormat format;
805 
806   PetscFunctionBegin;
807   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
808   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
809   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
810   if (iascii) {
811     PetscCall(PetscViewerGetType(viewer, &vtype));
812     PetscCall(PetscViewerGetFormat(viewer, &format));
813     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
814       MatInfo info;
815       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
816       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
817       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
818                                                    (PetscInt)info.memory));
819       PetscCall(PetscViewerFlush(viewer));
820       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
821       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
822       PetscFunctionReturn(PETSC_SUCCESS);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       PetscFunctionReturn(PETSC_SUCCESS);
825     }
826   } else if (isdraw) {
827     PetscDraw draw;
828     PetscBool isnull;
829 
830     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
831     PetscCall(PetscDrawIsNull(draw, &isnull));
832     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
833   }
834 
835   {
836     /* assemble the entire matrix onto first processor. */
837     Mat          A;
838     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
839     PetscInt    *cols;
840     PetscScalar *vals;
841 
842     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
843     if (rank == 0) {
844       PetscCall(MatSetSizes(A, M, N, M, N));
845     } else {
846       PetscCall(MatSetSizes(A, 0, 0, M, N));
847     }
848     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
849     PetscCall(MatSetType(A, MATMPIDENSE));
850     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
851 
852     /* Copy the matrix ... This isn't the most efficient means,
853        but it's quick for now */
854     A->insertmode = INSERT_VALUES;
855 
856     row = mat->rmap->rstart;
857     m   = mdn->A->rmap->n;
858     for (i = 0; i < m; i++) {
859       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
860       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
861       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
862       row++;
863     }
864 
865     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
866     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
867     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
868     if (rank == 0) {
869       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
870       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
871     }
872     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
873     PetscCall(MatDestroy(&A));
874   }
875   PetscFunctionReturn(PETSC_SUCCESS);
876 }
877 
878 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
879 {
880   PetscBool iascii, isbinary, isdraw, issocket;
881 
882   PetscFunctionBegin;
883   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
884   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
885   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
886   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
887 
888   if (iascii || issocket || isdraw) {
889     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
890   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
891   PetscFunctionReturn(PETSC_SUCCESS);
892 }
893 
894 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
895 {
896   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
897   Mat            mdn = mat->A;
898   PetscLogDouble isend[5], irecv[5];
899 
900   PetscFunctionBegin;
901   info->block_size = 1.0;
902 
903   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
904 
905   isend[0] = info->nz_used;
906   isend[1] = info->nz_allocated;
907   isend[2] = info->nz_unneeded;
908   isend[3] = info->memory;
909   isend[4] = info->mallocs;
910   if (flag == MAT_LOCAL) {
911     info->nz_used      = isend[0];
912     info->nz_allocated = isend[1];
913     info->nz_unneeded  = isend[2];
914     info->memory       = isend[3];
915     info->mallocs      = isend[4];
916   } else if (flag == MAT_GLOBAL_MAX) {
917     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
918 
919     info->nz_used      = irecv[0];
920     info->nz_allocated = irecv[1];
921     info->nz_unneeded  = irecv[2];
922     info->memory       = irecv[3];
923     info->mallocs      = irecv[4];
924   } else if (flag == MAT_GLOBAL_SUM) {
925     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
926 
927     info->nz_used      = irecv[0];
928     info->nz_allocated = irecv[1];
929     info->nz_unneeded  = irecv[2];
930     info->memory       = irecv[3];
931     info->mallocs      = irecv[4];
932   }
933   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
934   info->fill_ratio_needed = 0;
935   info->factor_mallocs    = 0;
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
939 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
940 {
941   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
942 
943   PetscFunctionBegin;
944   switch (op) {
945   case MAT_NEW_NONZERO_LOCATIONS:
946   case MAT_NEW_NONZERO_LOCATION_ERR:
947   case MAT_NEW_NONZERO_ALLOCATION_ERR:
948     MatCheckPreallocated(A, 1);
949     PetscCall(MatSetOption(a->A, op, flg));
950     break;
951   case MAT_ROW_ORIENTED:
952     MatCheckPreallocated(A, 1);
953     a->roworiented = flg;
954     PetscCall(MatSetOption(a->A, op, flg));
955     break;
956   case MAT_IGNORE_OFF_PROC_ENTRIES:
957     a->donotstash = flg;
958     break;
959   case MAT_SYMMETRIC:
960   case MAT_STRUCTURALLY_SYMMETRIC:
961   case MAT_HERMITIAN:
962   case MAT_SYMMETRY_ETERNAL:
963   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
964   case MAT_SPD:
965   case MAT_SPD_ETERNAL:
966     /* if the diagonal matrix is square it inherits some of the properties above */
967     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
968     break;
969   default:
970     break;
971   }
972   PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 
975 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
976 {
977   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
978   const PetscScalar *l;
979   PetscScalar        x, *v, *vv, *r;
980   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
981 
982   PetscFunctionBegin;
983   PetscCall(MatDenseGetArray(mdn->A, &vv));
984   PetscCall(MatDenseGetLDA(mdn->A, &lda));
985   PetscCall(MatGetLocalSize(A, &s2, &s3));
986   if (ll) {
987     PetscCall(VecGetLocalSize(ll, &s2a));
988     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
989     PetscCall(VecGetArrayRead(ll, &l));
990     for (i = 0; i < m; i++) {
991       x = l[i];
992       v = vv + i;
993       for (j = 0; j < n; j++) {
994         (*v) *= x;
995         v += lda;
996       }
997     }
998     PetscCall(VecRestoreArrayRead(ll, &l));
999     PetscCall(PetscLogFlops(1.0 * n * m));
1000   }
1001   if (rr) {
1002     const PetscScalar *ar;
1003 
1004     PetscCall(VecGetLocalSize(rr, &s3a));
1005     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1006     PetscCall(VecGetArrayRead(rr, &ar));
1007     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1008     PetscCall(VecGetArray(mdn->lvec, &r));
1009     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1010     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1011     PetscCall(VecRestoreArrayRead(rr, &ar));
1012     for (i = 0; i < n; i++) {
1013       x = r[i];
1014       v = vv + i * lda;
1015       for (j = 0; j < m; j++) (*v++) *= x;
1016     }
1017     PetscCall(VecRestoreArray(mdn->lvec, &r));
1018     PetscCall(PetscLogFlops(1.0 * n * m));
1019   }
1020   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1021   PetscFunctionReturn(PETSC_SUCCESS);
1022 }
1023 
1024 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1025 {
1026   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1027   PetscInt           i, j;
1028   PetscMPIInt        size;
1029   PetscReal          sum = 0.0;
1030   const PetscScalar *av, *v;
1031 
1032   PetscFunctionBegin;
1033   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1034   v = av;
1035   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1036   if (size == 1) {
1037     PetscCall(MatNorm(mdn->A, type, nrm));
1038   } else {
1039     if (type == NORM_FROBENIUS) {
1040       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1041         sum += PetscRealPart(PetscConj(*v) * (*v));
1042         v++;
1043       }
1044       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1045       *nrm = PetscSqrtReal(*nrm);
1046       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1047     } else if (type == NORM_1) {
1048       PetscReal *tmp;
1049 
1050       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1051       *nrm = 0.0;
1052       v    = av;
1053       for (j = 0; j < mdn->A->cmap->n; j++) {
1054         for (i = 0; i < mdn->A->rmap->n; i++) {
1055           tmp[j] += PetscAbsScalar(*v);
1056           v++;
1057         }
1058       }
1059       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1060       for (j = 0; j < A->cmap->N; j++) {
1061         if (tmp[j] > *nrm) *nrm = tmp[j];
1062       }
1063       PetscCall(PetscFree(tmp));
1064       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1065     } else if (type == NORM_INFINITY) { /* max row norm */
1066       PetscCall(MatNorm(mdn->A, type, nrm));
1067       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1068     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1069   }
1070   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1071   PetscFunctionReturn(PETSC_SUCCESS);
1072 }
1073 
1074 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1075 {
1076   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1077   Mat           B;
1078   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1079   PetscInt      j, i, lda;
1080   PetscScalar  *v;
1081 
1082   PetscFunctionBegin;
1083   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1084   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1085     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1086     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1087     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1088     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1089   } else B = *matout;
1090 
1091   m = a->A->rmap->n;
1092   n = a->A->cmap->n;
1093   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1094   PetscCall(MatDenseGetLDA(a->A, &lda));
1095   PetscCall(PetscMalloc1(m, &rwork));
1096   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1097   for (j = 0; j < n; j++) {
1098     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1099     v = PetscSafePointerPlusOffset(v, lda);
1100   }
1101   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1102   PetscCall(PetscFree(rwork));
1103   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1104   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1105   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1106     *matout = B;
1107   } else {
1108     PetscCall(MatHeaderMerge(A, &B));
1109   }
1110   PetscFunctionReturn(PETSC_SUCCESS);
1111 }
1112 
1113 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1114 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1115 
1116 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1117 {
1118   PetscFunctionBegin;
1119   PetscCall(PetscLayoutSetUp(A->rmap));
1120   PetscCall(PetscLayoutSetUp(A->cmap));
1121   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1126 {
1127   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1128 
1129   PetscFunctionBegin;
1130   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1135 {
1136   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1137 
1138   PetscFunctionBegin;
1139   PetscCall(MatConjugate(a->A));
1140   PetscFunctionReturn(PETSC_SUCCESS);
1141 }
1142 
1143 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1144 {
1145   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1146 
1147   PetscFunctionBegin;
1148   PetscCall(MatRealPart(a->A));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1153 {
1154   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1155 
1156   PetscFunctionBegin;
1157   PetscCall(MatImaginaryPart(a->A));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1162 {
1163   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1167   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1168   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1173 
1174 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1175 {
1176   PetscInt      i, m, n;
1177   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatGetSize(A, &m, &n));
1181   if (type == REDUCTION_MEAN_REALPART) {
1182     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1183   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1184     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1185   } else {
1186     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1187   }
1188   if (type == NORM_2) {
1189     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1190   }
1191   if (type == NORM_INFINITY) {
1192     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1193   } else {
1194     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1195   }
1196   if (type == NORM_2) {
1197     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1198   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1199     for (i = 0; i < n; i++) reductions[i] /= m;
1200   }
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1205 {
1206   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(MatSetRandom(d->A, rctx));
1210 #if defined(PETSC_HAVE_DEVICE)
1211   x->offloadmask = d->A->offloadmask;
1212 #endif
1213   PetscFunctionReturn(PETSC_SUCCESS);
1214 }
1215 
1216 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1217 {
1218   PetscFunctionBegin;
1219   *missing = PETSC_FALSE;
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1224 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1225 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1226 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1227 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1228 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1229 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1230 
1231 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1232                                        MatGetRow_MPIDense,
1233                                        MatRestoreRow_MPIDense,
1234                                        MatMult_MPIDense,
1235                                        /*  4*/ MatMultAdd_MPIDense,
1236                                        MatMultTranspose_MPIDense,
1237                                        MatMultTransposeAdd_MPIDense,
1238                                        NULL,
1239                                        NULL,
1240                                        NULL,
1241                                        /* 10*/ NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        MatTranspose_MPIDense,
1246                                        /* 15*/ MatGetInfo_MPIDense,
1247                                        MatEqual_MPIDense,
1248                                        MatGetDiagonal_MPIDense,
1249                                        MatDiagonalScale_MPIDense,
1250                                        MatNorm_MPIDense,
1251                                        /* 20*/ MatAssemblyBegin_MPIDense,
1252                                        MatAssemblyEnd_MPIDense,
1253                                        MatSetOption_MPIDense,
1254                                        MatZeroEntries_MPIDense,
1255                                        /* 24*/ MatZeroRows_MPIDense,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        /* 29*/ MatSetUp_MPIDense,
1261                                        NULL,
1262                                        NULL,
1263                                        MatGetDiagonalBlock_MPIDense,
1264                                        NULL,
1265                                        /* 34*/ MatDuplicate_MPIDense,
1266                                        NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        NULL,
1270                                        /* 39*/ MatAXPY_MPIDense,
1271                                        MatCreateSubMatrices_MPIDense,
1272                                        NULL,
1273                                        MatGetValues_MPIDense,
1274                                        MatCopy_MPIDense,
1275                                        /* 44*/ NULL,
1276                                        MatScale_MPIDense,
1277                                        MatShift_MPIDense,
1278                                        NULL,
1279                                        NULL,
1280                                        /* 49*/ MatSetRandom_MPIDense,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        /* 54*/ NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1291                                        MatDestroy_MPIDense,
1292                                        MatView_MPIDense,
1293                                        NULL,
1294                                        NULL,
1295                                        /* 64*/ NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        /* 69*/ NULL,
1301                                        NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        NULL,
1305                                        /* 74*/ NULL,
1306                                        NULL,
1307                                        NULL,
1308                                        NULL,
1309                                        MatLoad_MPIDense,
1310                                        /* 79*/ NULL,
1311                                        NULL,
1312                                        NULL,
1313                                        NULL,
1314                                        /* 83*/ NULL,
1315                                        NULL,
1316                                        NULL,
1317                                        NULL,
1318                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1319                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1320                                        /* 89*/ NULL,
1321                                        MatProductSetFromOptions_MPIDense,
1322                                        NULL,
1323                                        NULL,
1324                                        MatConjugate_MPIDense,
1325                                        /* 94*/ NULL,
1326                                        NULL,
1327                                        MatRealPart_MPIDense,
1328                                        MatImaginaryPart_MPIDense,
1329                                        NULL,
1330                                        /*99*/ NULL,
1331                                        NULL,
1332                                        NULL,
1333                                        NULL,
1334                                        MatGetColumnVector_MPIDense,
1335                                        /*104*/ MatMissingDiagonal_MPIDense,
1336                                        NULL,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        /*109*/ NULL,
1341                                        NULL,
1342                                        NULL,
1343                                        MatMultHermitianTranspose_MPIDense,
1344                                        MatMultHermitianTransposeAdd_MPIDense,
1345                                        /*114*/ NULL,
1346                                        NULL,
1347                                        MatGetColumnReductions_MPIDense,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        /*120*/ NULL,
1352                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1353                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1354                                        NULL,
1355                                        /*124*/ NULL,
1356                                        NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        /*129*/ NULL,
1361                                        NULL,
1362                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1363                                        NULL,
1364                                        NULL,
1365                                        /*134*/ NULL,
1366                                        NULL,
1367                                        NULL,
1368                                        NULL,
1369                                        NULL,
1370                                        /*139*/ NULL,
1371                                        NULL,
1372                                        NULL,
1373                                        NULL};
1374 
1375 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1376 {
1377   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1378   MatType       mtype = MATSEQDENSE;
1379 
1380   PetscFunctionBegin;
1381   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1382   PetscCall(PetscLayoutSetUp(mat->rmap));
1383   PetscCall(PetscLayoutSetUp(mat->cmap));
1384   if (!a->A) {
1385     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1386     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1387   }
1388 #if defined(PETSC_HAVE_CUDA)
1389   PetscBool iscuda;
1390   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1391   if (iscuda) mtype = MATSEQDENSECUDA;
1392 #endif
1393 #if defined(PETSC_HAVE_HIP)
1394   PetscBool iship;
1395   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1396   if (iship) mtype = MATSEQDENSEHIP;
1397 #endif
1398   PetscCall(MatSetType(a->A, mtype));
1399   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1400 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1401   mat->offloadmask = a->A->offloadmask;
1402 #endif
1403   mat->preallocated = PETSC_TRUE;
1404   mat->assembled    = PETSC_TRUE;
1405   PetscFunctionReturn(PETSC_SUCCESS);
1406 }
1407 
1408 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1409 {
1410   Mat B, C;
1411 
1412   PetscFunctionBegin;
1413   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1414   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1415   PetscCall(MatDestroy(&C));
1416   if (reuse == MAT_REUSE_MATRIX) {
1417     C = *newmat;
1418   } else C = NULL;
1419   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1420   PetscCall(MatDestroy(&B));
1421   if (reuse == MAT_INPLACE_MATRIX) {
1422     PetscCall(MatHeaderReplace(A, &C));
1423   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1424   PetscFunctionReturn(PETSC_SUCCESS);
1425 }
1426 
1427 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1428 {
1429   Mat B, C;
1430 
1431   PetscFunctionBegin;
1432   PetscCall(MatDenseGetLocalMatrix(A, &C));
1433   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1434   if (reuse == MAT_REUSE_MATRIX) {
1435     C = *newmat;
1436   } else C = NULL;
1437   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1438   PetscCall(MatDestroy(&B));
1439   if (reuse == MAT_INPLACE_MATRIX) {
1440     PetscCall(MatHeaderReplace(A, &C));
1441   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1442   PetscFunctionReturn(PETSC_SUCCESS);
1443 }
1444 
1445 #if defined(PETSC_HAVE_ELEMENTAL)
1446 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1447 {
1448   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1449   Mat           mat_elemental;
1450   PetscScalar  *v;
1451   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1452 
1453   PetscFunctionBegin;
1454   if (reuse == MAT_REUSE_MATRIX) {
1455     mat_elemental = *newmat;
1456     PetscCall(MatZeroEntries(*newmat));
1457   } else {
1458     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1459     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1460     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1461     PetscCall(MatSetUp(mat_elemental));
1462     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1463   }
1464 
1465   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1466   for (i = 0; i < N; i++) cols[i] = i;
1467   for (i = 0; i < m; i++) rows[i] = rstart + i;
1468 
1469   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1470   PetscCall(MatDenseGetArray(A, &v));
1471   PetscCall(MatDenseGetLDA(a->A, &lda));
1472   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1473   else {
1474     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1475   }
1476   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1477   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1478   PetscCall(MatDenseRestoreArray(A, &v));
1479   PetscCall(PetscFree2(rows, cols));
1480 
1481   if (reuse == MAT_INPLACE_MATRIX) {
1482     PetscCall(MatHeaderReplace(A, &mat_elemental));
1483   } else {
1484     *newmat = mat_elemental;
1485   }
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 #endif
1489 
1490 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1491 {
1492   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1493 
1494   PetscFunctionBegin;
1495   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1500 {
1501   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1502 
1503   PetscFunctionBegin;
1504   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1505   PetscFunctionReturn(PETSC_SUCCESS);
1506 }
1507 
1508 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1509 {
1510   Mat_MPIDense *mat;
1511   PetscInt      m, nloc, N;
1512 
1513   PetscFunctionBegin;
1514   PetscCall(MatGetSize(inmat, &m, &N));
1515   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1516   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1517     PetscInt sum;
1518 
1519     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1520     /* Check sum(n) = N */
1521     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1522     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1523 
1524     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1525     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1526   }
1527 
1528   /* numeric phase */
1529   mat = (Mat_MPIDense *)(*outmat)->data;
1530   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1531   PetscFunctionReturn(PETSC_SUCCESS);
1532 }
1533 
1534 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1535 {
1536   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1537   PetscInt      lda;
1538 
1539   PetscFunctionBegin;
1540   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1541   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1542   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1543   a->vecinuse = col + 1;
1544   PetscCall(MatDenseGetLDA(a->A, &lda));
1545   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1546   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1547   *v = a->cvec;
1548   PetscFunctionReturn(PETSC_SUCCESS);
1549 }
1550 
1551 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1552 {
1553   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1554 
1555   PetscFunctionBegin;
1556   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1557   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1558   VecCheckAssembled(a->cvec);
1559   a->vecinuse = 0;
1560   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1561   PetscCall(VecResetArray(a->cvec));
1562   if (v) *v = NULL;
1563   PetscFunctionReturn(PETSC_SUCCESS);
1564 }
1565 
1566 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1567 {
1568   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1569   PetscInt      lda;
1570 
1571   PetscFunctionBegin;
1572   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1573   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1574   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1575   a->vecinuse = col + 1;
1576   PetscCall(MatDenseGetLDA(a->A, &lda));
1577   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1578   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1579   PetscCall(VecLockReadPush(a->cvec));
1580   *v = a->cvec;
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1585 {
1586   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1587 
1588   PetscFunctionBegin;
1589   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1590   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1591   VecCheckAssembled(a->cvec);
1592   a->vecinuse = 0;
1593   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1594   PetscCall(VecLockReadPop(a->cvec));
1595   PetscCall(VecResetArray(a->cvec));
1596   if (v) *v = NULL;
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1601 {
1602   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1603   PetscInt      lda;
1604 
1605   PetscFunctionBegin;
1606   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1607   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1608   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1609   a->vecinuse = col + 1;
1610   PetscCall(MatDenseGetLDA(a->A, &lda));
1611   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1612   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1613   *v = a->cvec;
1614   PetscFunctionReturn(PETSC_SUCCESS);
1615 }
1616 
1617 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1618 {
1619   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1620 
1621   PetscFunctionBegin;
1622   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1623   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1624   VecCheckAssembled(a->cvec);
1625   a->vecinuse = 0;
1626   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1627   PetscCall(VecResetArray(a->cvec));
1628   if (v) *v = NULL;
1629   PetscFunctionReturn(PETSC_SUCCESS);
1630 }
1631 
1632 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1633 {
1634   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1635   Mat_MPIDense *c;
1636   MPI_Comm      comm;
1637   PetscInt      prbegin, prend, pcbegin, pcend;
1638 
1639   PetscFunctionBegin;
1640   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1641   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1642   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1643   prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1644   prend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1645   pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1646   pcend   = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1647   if (!a->cmat) {
1648     PetscCall(MatCreate(comm, &a->cmat));
1649     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1650     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1651     else {
1652       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1653       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1654       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1655     }
1656     if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1657     else {
1658       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1659       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1660       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1661     }
1662     c             = (Mat_MPIDense *)a->cmat->data;
1663     c->sub_rbegin = rbegin;
1664     c->sub_rend   = rend;
1665     c->sub_cbegin = cbegin;
1666     c->sub_cend   = cend;
1667   }
1668   c = (Mat_MPIDense *)a->cmat->data;
1669   if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1670     PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1671     PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1672     PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1673     PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1674     PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1675     c->sub_rbegin = rbegin;
1676     c->sub_rend   = rend;
1677   }
1678   if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1679     // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1680     if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1681       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1682       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1683       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1684       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1685       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1686       PetscCall(VecDestroy(&c->lvec));
1687       PetscCall(PetscSFDestroy(&c->Mvctx));
1688     }
1689     c->sub_cbegin = cbegin;
1690     c->sub_cend   = cend;
1691   }
1692   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1693   PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1694 
1695   a->cmat->preallocated = PETSC_TRUE;
1696   a->cmat->assembled    = PETSC_TRUE;
1697 #if defined(PETSC_HAVE_DEVICE)
1698   a->cmat->offloadmask = c->A->offloadmask;
1699 #endif
1700   a->matinuse = cbegin + 1;
1701   *v          = a->cmat;
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1706 {
1707   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1708   Mat_MPIDense *c;
1709 
1710   PetscFunctionBegin;
1711   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1712   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1713   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1714   a->matinuse = 0;
1715   c           = (Mat_MPIDense *)a->cmat->data;
1716   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1717   if (v) *v = NULL;
1718 #if defined(PETSC_HAVE_DEVICE)
1719   A->offloadmask = a->A->offloadmask;
1720 #endif
1721   PetscFunctionReturn(PETSC_SUCCESS);
1722 }
1723 
1724 /*MC
1725    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1726 
1727    Options Database Key:
1728 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1729 
1730   Level: beginner
1731 
1732 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1733 M*/
1734 PetscErrorCode MatCreate_MPIDense(Mat mat)
1735 {
1736   Mat_MPIDense *a;
1737 
1738   PetscFunctionBegin;
1739   PetscCall(PetscNew(&a));
1740   mat->data   = (void *)a;
1741   mat->ops[0] = MatOps_Values;
1742 
1743   mat->insertmode = NOT_SET_VALUES;
1744 
1745   /* build cache for off array entries formed */
1746   a->donotstash = PETSC_FALSE;
1747 
1748   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1749 
1750   /* stuff used for matrix vector multiply */
1751   a->lvec        = NULL;
1752   a->Mvctx       = NULL;
1753   a->roworiented = PETSC_TRUE;
1754 
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1776 #if defined(PETSC_HAVE_ELEMENTAL)
1777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1778 #endif
1779 #if defined(PETSC_HAVE_SCALAPACK)
1780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1781 #endif
1782 #if defined(PETSC_HAVE_CUDA)
1783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1784 #endif
1785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1788 #if defined(PETSC_HAVE_CUDA)
1789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1790   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1791 #endif
1792 #if defined(PETSC_HAVE_HIP)
1793   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1794   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1795   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1796 #endif
1797   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1798   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1799   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1800   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1801   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1802   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1803   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 /*MC
1808    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1809 
1810    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1811    and `MATMPIDENSE` otherwise.
1812 
1813    Options Database Key:
1814 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1815 
1816   Level: beginner
1817 
1818 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1819 M*/
1820 
1821 /*@
1822   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1823 
1824   Collective
1825 
1826   Input Parameters:
1827 + B    - the matrix
1828 - data - optional location of matrix data.  Set to `NULL` for PETSc
1829          to control all matrix memory allocation.
1830 
1831   Level: intermediate
1832 
1833   Notes:
1834   The dense format is fully compatible with standard Fortran
1835   storage by columns.
1836 
1837   The data input variable is intended primarily for Fortran programmers
1838   who wish to allocate their own matrix memory space.  Most users should
1839   set `data` to `NULL`.
1840 
1841 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1842 @*/
1843 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1844 {
1845   PetscFunctionBegin;
1846   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1847   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1848   PetscFunctionReturn(PETSC_SUCCESS);
1849 }
1850 
1851 /*@
1852   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1853   array provided by the user. This is useful to avoid copying an array
1854   into a matrix
1855 
1856   Not Collective
1857 
1858   Input Parameters:
1859 + mat   - the matrix
1860 - array - the array in column major order
1861 
1862   Level: developer
1863 
1864   Note:
1865   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1866 
1867   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1868   freed when the matrix is destroyed.
1869 
1870 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1871           `MatDenseReplaceArray()`
1872 @*/
1873 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1874 {
1875   PetscFunctionBegin;
1876   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1877   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1878   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1879 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1880   mat->offloadmask = PETSC_OFFLOAD_CPU;
1881 #endif
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 /*@
1886   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1887 
1888   Not Collective
1889 
1890   Input Parameter:
1891 . mat - the matrix
1892 
1893   Level: developer
1894 
1895   Note:
1896   You can only call this after a call to `MatDensePlaceArray()`
1897 
1898 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1899 @*/
1900 PetscErrorCode MatDenseResetArray(Mat mat)
1901 {
1902   PetscFunctionBegin;
1903   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1904   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1905   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1906   PetscFunctionReturn(PETSC_SUCCESS);
1907 }
1908 
1909 /*@
1910   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1911   array provided by the user. This is useful to avoid copying an array
1912   into a matrix
1913 
1914   Not Collective
1915 
1916   Input Parameters:
1917 + mat   - the matrix
1918 - array - the array in column major order
1919 
1920   Level: developer
1921 
1922   Note:
1923   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1924 
1925   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1926   freed by the user. It will be freed when the matrix is destroyed.
1927 
1928 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1929 @*/
1930 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1931 {
1932   PetscFunctionBegin;
1933   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1934   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1935   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1936 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1937   mat->offloadmask = PETSC_OFFLOAD_CPU;
1938 #endif
1939   PetscFunctionReturn(PETSC_SUCCESS);
1940 }
1941 
1942 /*@
1943   MatCreateDense - Creates a matrix in `MATDENSE` format.
1944 
1945   Collective
1946 
1947   Input Parameters:
1948 + comm - MPI communicator
1949 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1950 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1951 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1952 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1953 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1954    to control all matrix memory allocation.
1955 
1956   Output Parameter:
1957 . A - the matrix
1958 
1959   Level: intermediate
1960 
1961   Notes:
1962   The dense format is fully compatible with standard Fortran
1963   storage by columns.
1964 
1965   Although local portions of the matrix are stored in column-major
1966   order, the matrix is partitioned across MPI ranks by row.
1967 
1968   The data input variable is intended primarily for Fortran programmers
1969   who wish to allocate their own matrix memory space.  Most users should
1970   set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).
1971 
1972   The user MUST specify either the local or global matrix dimensions
1973   (possibly both).
1974 
1975 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1976 @*/
1977 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1978 {
1979   PetscFunctionBegin;
1980   PetscCall(MatCreate(comm, A));
1981   PetscCall(MatSetSizes(*A, m, n, M, N));
1982   PetscCall(MatSetType(*A, MATDENSE));
1983   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1984   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1985   PetscFunctionReturn(PETSC_SUCCESS);
1986 }
1987 
1988 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1989 {
1990   Mat           mat;
1991   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1992 
1993   PetscFunctionBegin;
1994   *newmat = NULL;
1995   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1996   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1997   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1998   a = (Mat_MPIDense *)mat->data;
1999 
2000   mat->factortype   = A->factortype;
2001   mat->assembled    = PETSC_TRUE;
2002   mat->preallocated = PETSC_TRUE;
2003 
2004   mat->insertmode = NOT_SET_VALUES;
2005   a->donotstash   = oldmat->donotstash;
2006 
2007   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2008   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2009 
2010   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2011 
2012   *newmat = mat;
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2017 {
2018   PetscBool isbinary;
2019 #if defined(PETSC_HAVE_HDF5)
2020   PetscBool ishdf5;
2021 #endif
2022 
2023   PetscFunctionBegin;
2024   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2025   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2026   /* force binary viewer to load .info file if it has not yet done so */
2027   PetscCall(PetscViewerSetUp(viewer));
2028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2029 #if defined(PETSC_HAVE_HDF5)
2030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2031 #endif
2032   if (isbinary) {
2033     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2034 #if defined(PETSC_HAVE_HDF5)
2035   } else if (ishdf5) {
2036     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2037 #endif
2038   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2039   PetscFunctionReturn(PETSC_SUCCESS);
2040 }
2041 
2042 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2043 {
2044   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2045   Mat           a, b;
2046 
2047   PetscFunctionBegin;
2048   a = matA->A;
2049   b = matB->A;
2050   PetscCall(MatEqual(a, b, flag));
2051   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2056 {
2057   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2058 
2059   PetscFunctionBegin;
2060   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2061   PetscCall(MatDestroy(&atb->atb));
2062   PetscCall(PetscFree(atb));
2063   PetscFunctionReturn(PETSC_SUCCESS);
2064 }
2065 
2066 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2067 {
2068   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2069 
2070   PetscFunctionBegin;
2071   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2072   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2073   PetscCall(PetscFree(abt));
2074   PetscFunctionReturn(PETSC_SUCCESS);
2075 }
2076 
2077 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2078 {
2079   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2080   Mat_TransMatMultDense *atb;
2081   MPI_Comm               comm;
2082   PetscMPIInt            size, *recvcounts;
2083   PetscScalar           *carray, *sendbuf;
2084   const PetscScalar     *atbarray;
2085   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2086   const PetscInt        *ranges;
2087 
2088   PetscFunctionBegin;
2089   MatCheckProduct(C, 3);
2090   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2091   atb        = (Mat_TransMatMultDense *)C->product->data;
2092   recvcounts = atb->recvcounts;
2093   sendbuf    = atb->sendbuf;
2094 
2095   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2096   PetscCallMPI(MPI_Comm_size(comm, &size));
2097 
2098   /* compute atbarray = aseq^T * bseq */
2099   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2100 
2101   PetscCall(MatGetOwnershipRanges(C, &ranges));
2102 
2103   if (ranges[1] == C->rmap->N) {
2104     /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2105     PetscInt           atb_lda, c_lda;
2106     Mat                atb_local = atb->atb;
2107     Mat                atb_alloc = NULL;
2108     Mat                c_local   = c->A;
2109     Mat                c_alloc   = NULL;
2110     PetscMemType       atb_memtype, c_memtype;
2111     const PetscScalar *atb_array = NULL;
2112     MPI_Datatype       vector_type;
2113     PetscScalar       *c_array = NULL;
2114     PetscMPIInt        rank;
2115 
2116     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2117 
2118     PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2119     if (atb_lda != C->rmap->N) {
2120       // copy atb to a matrix that will have lda == the number of rows
2121       PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2122       PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2123       atb_local = atb_alloc;
2124     }
2125 
2126     if (rank == 0) {
2127       PetscCall(MatDenseGetLDA(c_local, &c_lda));
2128       if (c_lda != C->rmap->N) {
2129         // copy c to a matrix that will have lda == the number of rows
2130         PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2131         c_local = c_alloc;
2132       }
2133       PetscCall(MatZeroEntries(c_local));
2134     }
2135     /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2136      * B->cmap->N: use the a->Mvctx to use the best reduction method */
2137     if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2138     vector_type = MPIU_SCALAR;
2139     if (B->cmap->N > 1) {
2140       PetscMPIInt mpi_N;
2141 
2142       PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2143       PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2144       PetscCallMPI(MPI_Type_commit(&vector_type));
2145     }
2146     PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2147     PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2148     PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2149     PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2150     PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2151     PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2152     if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2153     if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2154     PetscCall(MatDestroy(&atb_alloc));
2155     PetscCall(MatDestroy(&c_alloc));
2156     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2157     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2158     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2159     PetscFunctionReturn(PETSC_SUCCESS);
2160   }
2161 
2162   /* arrange atbarray into sendbuf */
2163   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2164   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2165   for (proc = 0, k = 0; proc < size; proc++) {
2166     for (j = 0; j < cN; j++) {
2167       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2168     }
2169   }
2170   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2171 
2172   /* sum all atbarray to local values of C */
2173   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2174   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2175   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2176   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2177   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2178   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2179   PetscFunctionReturn(PETSC_SUCCESS);
2180 }
2181 
2182 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2183 {
2184   MPI_Comm               comm;
2185   PetscMPIInt            size;
2186   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2187   Mat_TransMatMultDense *atb;
2188   PetscBool              cisdense = PETSC_FALSE;
2189   const PetscInt        *ranges;
2190 
2191   PetscFunctionBegin;
2192   MatCheckProduct(C, 4);
2193   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2194   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2195   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2196     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2197   }
2198 
2199   /* create matrix product C */
2200   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2201 #if defined(PETSC_HAVE_CUDA)
2202   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2203 #endif
2204 #if defined(PETSC_HAVE_HIP)
2205   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2206 #endif
2207   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2208   PetscCall(MatSetUp(C));
2209 
2210   /* create data structure for reuse C */
2211   PetscCallMPI(MPI_Comm_size(comm, &size));
2212   PetscCall(PetscNew(&atb));
2213   cM = C->rmap->N;
2214   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2215   PetscCall(MatGetOwnershipRanges(C, &ranges));
2216   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2217   C->product->data    = atb;
2218   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2219   PetscFunctionReturn(PETSC_SUCCESS);
2220 }
2221 
2222 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2223 {
2224   MPI_Comm               comm;
2225   PetscMPIInt            i, size;
2226   PetscInt               maxRows, bufsiz;
2227   PetscMPIInt            tag;
2228   PetscInt               alg;
2229   Mat_MatTransMultDense *abt;
2230   Mat_Product           *product = C->product;
2231   PetscBool              flg;
2232 
2233   PetscFunctionBegin;
2234   MatCheckProduct(C, 4);
2235   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2236   /* check local size of A and B */
2237   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2238 
2239   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2240   alg = flg ? 0 : 1;
2241 
2242   /* setup matrix product C */
2243   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2244   PetscCall(MatSetType(C, MATMPIDENSE));
2245   PetscCall(MatSetUp(C));
2246   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2247 
2248   /* create data structure for reuse C */
2249   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2250   PetscCallMPI(MPI_Comm_size(comm, &size));
2251   PetscCall(PetscNew(&abt));
2252   abt->tag = tag;
2253   abt->alg = alg;
2254   switch (alg) {
2255   case 1: /* alg: "cyclic" */
2256     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2257     bufsiz = A->cmap->N * maxRows;
2258     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2259     break;
2260   default: /* alg: "allgatherv" */
2261     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2262     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2263     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2264     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2265     break;
2266   }
2267 
2268   C->product->data                = abt;
2269   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2270   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2271   PetscFunctionReturn(PETSC_SUCCESS);
2272 }
2273 
2274 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2275 {
2276   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2277   Mat_MatTransMultDense *abt;
2278   MPI_Comm               comm;
2279   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2280   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2281   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2282   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2283   const PetscScalar     *av, *bv;
2284   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2285   MPI_Request            reqs[2];
2286   const PetscInt        *ranges;
2287 
2288   PetscFunctionBegin;
2289   MatCheckProduct(C, 3);
2290   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2291   abt = (Mat_MatTransMultDense *)C->product->data;
2292   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2293   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2294   PetscCallMPI(MPI_Comm_size(comm, &size));
2295   PetscCall(MatDenseGetArrayRead(a->A, &av));
2296   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2297   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2298   PetscCall(MatDenseGetLDA(a->A, &i));
2299   PetscCall(PetscBLASIntCast(i, &alda));
2300   PetscCall(MatDenseGetLDA(b->A, &i));
2301   PetscCall(PetscBLASIntCast(i, &blda));
2302   PetscCall(MatDenseGetLDA(c->A, &i));
2303   PetscCall(PetscBLASIntCast(i, &clda));
2304   PetscCall(MatGetOwnershipRanges(B, &ranges));
2305   bn = B->rmap->n;
2306   if (blda == bn) {
2307     sendbuf = (PetscScalar *)bv;
2308   } else {
2309     sendbuf = abt->buf[0];
2310     for (k = 0, i = 0; i < cK; i++) {
2311       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2312     }
2313   }
2314   if (size > 1) {
2315     sendto   = (rank + size - 1) % size;
2316     recvfrom = (rank + size + 1) % size;
2317   } else {
2318     sendto = recvfrom = 0;
2319   }
2320   PetscCall(PetscBLASIntCast(cK, &ck));
2321   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2322   recvisfrom = rank;
2323   for (i = 0; i < size; i++) {
2324     /* we have finished receiving in sending, bufs can be read/modified */
2325     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2326     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2327 
2328     if (nextrecvisfrom != rank) {
2329       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2330       sendsiz = cK * bn;
2331       recvsiz = cK * nextbn;
2332       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2333       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2334       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2335     }
2336 
2337     /* local aseq * sendbuf^T */
2338     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2339     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2340 
2341     if (nextrecvisfrom != rank) {
2342       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2343       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2344     }
2345     bn         = nextbn;
2346     recvisfrom = nextrecvisfrom;
2347     sendbuf    = recvbuf;
2348   }
2349   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2350   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2351   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2352   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2353   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2354   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2355   PetscFunctionReturn(PETSC_SUCCESS);
2356 }
2357 
2358 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2359 {
2360   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2361   Mat_MatTransMultDense *abt;
2362   MPI_Comm               comm;
2363   PetscMPIInt            size, ibn;
2364   PetscScalar           *cv, *sendbuf, *recvbuf;
2365   const PetscScalar     *av, *bv;
2366   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2367   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2368   PetscBLASInt           cm, cn, ck, alda, clda;
2369 
2370   PetscFunctionBegin;
2371   MatCheckProduct(C, 3);
2372   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2373   abt = (Mat_MatTransMultDense *)C->product->data;
2374   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2375   PetscCallMPI(MPI_Comm_size(comm, &size));
2376   PetscCall(MatDenseGetArrayRead(a->A, &av));
2377   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2378   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2379   PetscCall(MatDenseGetLDA(a->A, &i));
2380   PetscCall(PetscBLASIntCast(i, &alda));
2381   PetscCall(MatDenseGetLDA(b->A, &blda));
2382   PetscCall(MatDenseGetLDA(c->A, &i));
2383   PetscCall(PetscBLASIntCast(i, &clda));
2384   /* copy transpose of B into buf[0] */
2385   bn      = B->rmap->n;
2386   sendbuf = abt->buf[0];
2387   recvbuf = abt->buf[1];
2388   for (k = 0, j = 0; j < bn; j++) {
2389     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2390   }
2391   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2392   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2393   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2394   PetscCall(PetscBLASIntCast(cK, &ck));
2395   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2396   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2397   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2398   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2399   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2400   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2401   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2402   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2403   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2404   PetscFunctionReturn(PETSC_SUCCESS);
2405 }
2406 
2407 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2408 {
2409   Mat_MatTransMultDense *abt;
2410 
2411   PetscFunctionBegin;
2412   MatCheckProduct(C, 3);
2413   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2414   abt = (Mat_MatTransMultDense *)C->product->data;
2415   switch (abt->alg) {
2416   case 1:
2417     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2418     break;
2419   default:
2420     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2421     break;
2422   }
2423   PetscFunctionReturn(PETSC_SUCCESS);
2424 }
2425 
2426 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2427 {
2428   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2429 
2430   PetscFunctionBegin;
2431   PetscCall(MatDestroy(&ab->Ce));
2432   PetscCall(MatDestroy(&ab->Ae));
2433   PetscCall(MatDestroy(&ab->Be));
2434   PetscCall(PetscFree(ab));
2435   PetscFunctionReturn(PETSC_SUCCESS);
2436 }
2437 
2438 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2439 {
2440   Mat_MatMultDense *ab;
2441   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2442   Mat_MPIDense     *b   = (Mat_MPIDense *)B->data;
2443 
2444   PetscFunctionBegin;
2445   MatCheckProduct(C, 3);
2446   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2447   ab = (Mat_MatMultDense *)C->product->data;
2448   if (ab->Ae && ab->Ce) {
2449 #if PetscDefined(HAVE_ELEMENTAL)
2450     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2451     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2452     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2453     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2454 #else
2455     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2456 #endif
2457   } else {
2458     MPI_Comm           comm;
2459     const PetscScalar *read;
2460     PetscScalar       *write;
2461     PetscInt           lda;
2462     const PetscInt    *ranges;
2463     PetscMPIInt        size;
2464 
2465     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2466     comm = PetscObjectComm((PetscObject)B);
2467     PetscCallMPI(MPI_Comm_size(comm, &size));
2468     PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2469     if (ranges[1] == ranges[size]) {
2470       // optimize for the case where the B matrix is broadcast from rank 0
2471       PetscInt           b_lda, be_lda;
2472       Mat                b_local  = b->A;
2473       Mat                b_alloc  = NULL;
2474       Mat                be_local = ab->Be;
2475       Mat                be_alloc = NULL;
2476       PetscMemType       b_memtype, be_memtype;
2477       const PetscScalar *b_array = NULL;
2478       MPI_Datatype       vector_type;
2479       PetscScalar       *be_array = NULL;
2480       PetscMPIInt        rank;
2481 
2482       PetscCallMPI(MPI_Comm_rank(comm, &rank));
2483       PetscCall(MatDenseGetLDA(be_local, &be_lda));
2484       if (be_lda != B->rmap->N) {
2485         PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2486         be_local = be_alloc;
2487       }
2488 
2489       if (rank == 0) {
2490         PetscCall(MatDenseGetLDA(b_local, &b_lda));
2491         if (b_lda != B->rmap->N) {
2492           PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2493           PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2494           b_local = b_alloc;
2495         }
2496       }
2497       vector_type = MPIU_SCALAR;
2498       if (B->cmap->N > 1) {
2499         PetscMPIInt mpi_N;
2500 
2501         PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2502         PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2503         PetscCallMPI(MPI_Type_commit(&vector_type));
2504       }
2505       PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2506       PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2507       PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2508       PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2509       PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2510       PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2511       if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2512       if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2513       PetscCall(MatDestroy(&be_alloc));
2514       PetscCall(MatDestroy(&b_alloc));
2515     } else {
2516       PetscCall(MatDenseGetLDA(B, &lda));
2517       PetscCall(MatDenseGetArrayRead(B, &read));
2518       PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2519       for (PetscInt i = 0; i < C->cmap->N; ++i) {
2520         PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2521         PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2522       }
2523       PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2524       PetscCall(MatDenseRestoreArrayRead(B, &read));
2525     }
2526     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2527   }
2528   PetscFunctionReturn(PETSC_SUCCESS);
2529 }
2530 
2531 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2532 {
2533   Mat_Product      *product = C->product;
2534   PetscInt          alg;
2535   Mat_MatMultDense *ab;
2536   PetscBool         flg;
2537 
2538   PetscFunctionBegin;
2539   MatCheckProduct(C, 4);
2540   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2541   /* check local size of A and B */
2542   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2543              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2544 
2545   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2546   alg = flg ? 0 : 1;
2547 
2548   /* setup C */
2549   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2550   PetscCall(MatSetType(C, MATMPIDENSE));
2551   PetscCall(MatSetUp(C));
2552 
2553   /* create data structure for reuse Cdense */
2554   PetscCall(PetscNew(&ab));
2555 
2556   switch (alg) {
2557   case 1: /* alg: "elemental" */
2558 #if PetscDefined(HAVE_ELEMENTAL)
2559     /* create elemental matrices Ae and Be */
2560     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2561     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2562     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2563     PetscCall(MatSetUp(ab->Ae));
2564     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2565 
2566     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2567     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2568     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2569     PetscCall(MatSetUp(ab->Be));
2570     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2571 
2572     /* compute symbolic Ce = Ae*Be */
2573     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2574     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2575 #else
2576     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2577 #endif
2578     break;
2579   default: /* alg: "petsc" */
2580     ab->Ae = NULL;
2581     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2582     ab->Ce = NULL;
2583     break;
2584   }
2585 
2586   C->product->data       = ab;
2587   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2588   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2589   PetscFunctionReturn(PETSC_SUCCESS);
2590 }
2591 
2592 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2593 {
2594   Mat_Product *product     = C->product;
2595   const char  *algTypes[2] = {"petsc", "elemental"};
2596   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2597   PetscBool    flg = PETSC_FALSE;
2598 
2599   PetscFunctionBegin;
2600   /* Set default algorithm */
2601   alg = 0; /* default is PETSc */
2602   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2603   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2604 
2605   /* Get runtime option */
2606   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2607   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2608   PetscOptionsEnd();
2609   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2610 
2611   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2612   C->ops->productsymbolic = MatProductSymbolic_AB;
2613   PetscFunctionReturn(PETSC_SUCCESS);
2614 }
2615 
2616 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2617 {
2618   Mat_Product *product = C->product;
2619   Mat          A = product->A, B = product->B;
2620 
2621   PetscFunctionBegin;
2622   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2623              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2624   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2625   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2626   PetscFunctionReturn(PETSC_SUCCESS);
2627 }
2628 
2629 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2630 {
2631   Mat_Product *product     = C->product;
2632   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2633   PetscInt     alg, nalg = 2;
2634   PetscBool    flg = PETSC_FALSE;
2635 
2636   PetscFunctionBegin;
2637   /* Set default algorithm */
2638   alg = 0; /* default is allgatherv */
2639   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2640   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2641 
2642   /* Get runtime option */
2643   if (product->api_user) {
2644     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2645     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2646     PetscOptionsEnd();
2647   } else {
2648     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2649     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2650     PetscOptionsEnd();
2651   }
2652   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2653 
2654   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2655   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
2659 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2660 {
2661   Mat_Product *product = C->product;
2662 
2663   PetscFunctionBegin;
2664   switch (product->type) {
2665   case MATPRODUCT_AB:
2666     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2667     break;
2668   case MATPRODUCT_AtB:
2669     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2670     break;
2671   case MATPRODUCT_ABt:
2672     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2673     break;
2674   default:
2675     break;
2676   }
2677   PetscFunctionReturn(PETSC_SUCCESS);
2678 }
2679