xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3f7878b3ed00daeee6ae8d2839a0d41a4d3e79ca)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 #include <petscdevice.h>
12 #include <petsc/private/deviceimpl.h>
13 
14 /*@
15   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16   matrix that represents the operator. For sequential matrices it returns itself.
17 
18   Input Parameter:
19 . A - the sequential or MPI `MATDENSE` matrix
20 
21   Output Parameter:
22 . B - the inner matrix
23 
24   Level: intermediate
25 
26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27 @*/
28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29 {
30   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31   PetscBool     flg;
32 
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
35   PetscAssertPointer(B, 2);
36   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37   if (flg) *B = mat->A;
38   else {
39     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41     *B = A;
42   }
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47 {
48   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
50 
51   PetscFunctionBegin;
52   PetscCall(MatCopy(Amat->A, Bmat->A, s));
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57 {
58   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60   PetscScalar  *v;
61 
62   PetscFunctionBegin;
63   PetscCall(MatDenseGetArray(mat->A, &v));
64   PetscCall(MatDenseGetLDA(mat->A, &lda));
65   rend2 = PetscMin(rend, A->cmap->N);
66   if (rend2 > rstart) {
67     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68     PetscCall(PetscLogFlops(rend2 - rstart));
69   }
70   PetscCall(MatDenseRestoreArray(mat->A, &v));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75 {
76   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
78 
79   PetscFunctionBegin;
80   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81   lrow = row - rstart;
82   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87 {
88   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
90 
91   PetscFunctionBegin;
92   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93   lrow = row - rstart;
94   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
102   PetscScalar  *array;
103   MPI_Comm      comm;
104   PetscBool     flg;
105   Mat           B;
106 
107   PetscFunctionBegin;
108   PetscCall(MatHasCongruentLayouts(A, &flg));
109   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112 #if PetscDefined(HAVE_CUDA)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115 #elif PetscDefined(HAVE_HIP)
116     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118 #endif
119     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120     PetscCall(MatCreate(comm, &B));
121     PetscCall(MatSetSizes(B, m, m, m, m));
122     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127     *a = B;
128     PetscCall(MatDestroy(&B));
129   } else *a = B;
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134 {
135   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137   PetscBool     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i = 0; i < m; i++) {
141     if (idxm[i] < 0) continue;
142     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147       } else {
148         for (j = 0; j < n; j++) {
149           if (idxn[j] < 0) continue;
150           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152         }
153       }
154     } else if (!A->donotstash) {
155       mat->assembled = PETSC_FALSE;
156       if (roworiented) {
157         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158       } else {
159         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160       }
161     }
162   }
163   PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 
166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167 {
168   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
170 
171   PetscFunctionBegin;
172   for (i = 0; i < m; i++) {
173     if (idxm[i] < 0) continue; /* negative row */
174     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175     if (idxm[i] >= rstart && idxm[i] < rend) {
176       row = idxm[i] - rstart;
177       for (j = 0; j < n; j++) {
178         if (idxn[j] < 0) continue; /* negative column */
179         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181       }
182     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
188 {
189   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
190 
191   PetscFunctionBegin;
192   PetscCall(MatDenseGetLDA(a->A, lda));
193   PetscFunctionReturn(PETSC_SUCCESS);
194 }
195 
196 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
197 {
198   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
199   MatType       mtype = MATSEQDENSE;
200 
201   PetscFunctionBegin;
202   if (!a->A) {
203     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
204     PetscCall(PetscLayoutSetUp(A->rmap));
205     PetscCall(PetscLayoutSetUp(A->cmap));
206     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
207     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
208 #if PetscDefined(HAVE_CUDA)
209     PetscBool iscuda;
210     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
211     if (iscuda) mtype = MATSEQDENSECUDA;
212 #elif PetscDefined(HAVE_HIP)
213     PetscBool iship;
214     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
215     if (iship) mtype = MATSEQDENSEHIP;
216 #endif
217     PetscCall(MatSetType(a->A, mtype));
218   }
219   PetscCall(MatDenseSetLDA(a->A, lda));
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
223 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
224 {
225   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
226 
227   PetscFunctionBegin;
228   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
229   PetscCall(MatDenseGetArray(a->A, array));
230   PetscFunctionReturn(PETSC_SUCCESS);
231 }
232 
233 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
234 {
235   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
236 
237   PetscFunctionBegin;
238   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
239   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
244 {
245   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
246 
247   PetscFunctionBegin;
248   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
249   PetscCall(MatDenseGetArrayWrite(a->A, array));
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
254 {
255   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
256 
257   PetscFunctionBegin;
258   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
259   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
260   PetscCall(MatDensePlaceArray(a->A, array));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
265 {
266   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
267 
268   PetscFunctionBegin;
269   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
270   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
271   PetscCall(MatDenseResetArray(a->A));
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
276 {
277   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
278 
279   PetscFunctionBegin;
280   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
281   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
282   PetscCall(MatDenseReplaceArray(a->A, array));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 
286 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
287 {
288   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
289   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
290   const PetscInt    *irow, *icol;
291   const PetscScalar *v;
292   PetscScalar       *bv;
293   Mat                newmat;
294   IS                 iscol_local;
295   MPI_Comm           comm_is, comm_mat;
296 
297   PetscFunctionBegin;
298   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
299   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
300   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
301 
302   PetscCall(ISAllGather(iscol, &iscol_local));
303   PetscCall(ISGetIndices(isrow, &irow));
304   PetscCall(ISGetIndices(iscol_local, &icol));
305   PetscCall(ISGetLocalSize(isrow, &nrows));
306   PetscCall(ISGetLocalSize(iscol, &ncols));
307   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
308 
309   /* No parallel redistribution currently supported! Should really check each index set
310      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
311      original matrix! */
312 
313   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
314   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
315 
316   /* Check submatrix call */
317   if (scall == MAT_REUSE_MATRIX) {
318     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
319     /* Really need to test rows and column sizes! */
320     newmat = *B;
321   } else {
322     /* Create and fill new matrix */
323     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
324     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
325     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
326     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
327   }
328 
329   /* Now extract the data pointers and do the copy, column at a time */
330   newmatd = (Mat_MPIDense *)newmat->data;
331   PetscCall(MatDenseGetArray(newmatd->A, &bv));
332   PetscCall(MatDenseGetArrayRead(mat->A, &v));
333   PetscCall(MatDenseGetLDA(mat->A, &lda));
334   for (i = 0; i < Ncols; i++) {
335     const PetscScalar *av = v + lda * icol[i];
336     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
337   }
338   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
339   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
340 
341   /* Assemble the matrices so that the correct flags are set */
342   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
343   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
344 
345   /* Free work space */
346   PetscCall(ISRestoreIndices(isrow, &irow));
347   PetscCall(ISRestoreIndices(iscol_local, &icol));
348   PetscCall(ISDestroy(&iscol_local));
349   *B = newmat;
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
353 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
354 {
355   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
356 
357   PetscFunctionBegin;
358   PetscCall(MatDenseRestoreArray(a->A, array));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
363 {
364   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
365 
366   PetscFunctionBegin;
367   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
372 {
373   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
374 
375   PetscFunctionBegin;
376   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
381 {
382   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
383   PetscInt      nstash, reallocs;
384 
385   PetscFunctionBegin;
386   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
387 
388   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
389   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
390   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
395 {
396   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
397   PetscInt      i, *row, *col, flg, j, rstart, ncols;
398   PetscMPIInt   n;
399   PetscScalar  *val;
400 
401   PetscFunctionBegin;
402   if (!mdn->donotstash && !mat->nooffprocentries) {
403     /*  wait on receives */
404     while (1) {
405       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
406       if (!flg) break;
407 
408       for (i = 0; i < n;) {
409         /* Now identify the consecutive vals belonging to the same row */
410         for (j = i, rstart = row[j]; j < n; j++) {
411           if (row[j] != rstart) break;
412         }
413         if (j < n) ncols = j - i;
414         else ncols = n - i;
415         /* Now assemble all these values with a single function call */
416         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
417         i = j;
418       }
419     }
420     PetscCall(MatStashScatterEnd_Private(&mat->stash));
421   }
422 
423   PetscCall(MatAssemblyBegin(mdn->A, mode));
424   PetscCall(MatAssemblyEnd(mdn->A, mode));
425   PetscFunctionReturn(PETSC_SUCCESS);
426 }
427 
428 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
429 {
430   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
431 
432   PetscFunctionBegin;
433   PetscCall(MatZeroEntries(l->A));
434   PetscFunctionReturn(PETSC_SUCCESS);
435 }
436 
437 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
438 {
439   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
440   PetscInt      i, len, *lrows;
441 
442   PetscFunctionBegin;
443   /* get locally owned rows */
444   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
445   /* fix right-hand side if needed */
446   if (x && b) {
447     const PetscScalar *xx;
448     PetscScalar       *bb;
449 
450     PetscCall(VecGetArrayRead(x, &xx));
451     PetscCall(VecGetArrayWrite(b, &bb));
452     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
453     PetscCall(VecRestoreArrayRead(x, &xx));
454     PetscCall(VecRestoreArrayWrite(b, &bb));
455   }
456   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
457   if (diag != 0.0) {
458     Vec d;
459 
460     PetscCall(MatCreateVecs(A, NULL, &d));
461     PetscCall(VecSet(d, diag));
462     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
463     PetscCall(VecDestroy(&d));
464   }
465   PetscCall(PetscFree(lrows));
466   PetscFunctionReturn(PETSC_SUCCESS);
467 }
468 
469 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
471 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
472 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
473 
474 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
475 {
476   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
477   const PetscScalar *ax;
478   PetscScalar       *ay;
479   PetscMemType       axmtype, aymtype;
480 
481   PetscFunctionBegin;
482   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
483   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
484   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
485   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
486   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
487   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
488   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
489   PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
493 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
494 {
495   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
496   const PetscScalar *ax;
497   PetscScalar       *ay;
498   PetscMemType       axmtype, aymtype;
499 
500   PetscFunctionBegin;
501   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
502   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
503   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
504   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
505   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
506   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
507   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
508   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
509   PetscFunctionReturn(PETSC_SUCCESS);
510 }
511 
512 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
513 {
514   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
515   const PetscScalar *ax;
516   PetscScalar       *ay;
517   PetscMemType       axmtype, aymtype;
518 
519   PetscFunctionBegin;
520   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
521   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
523   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
524   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
525   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
526   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
527   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
532 {
533   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
534   const PetscScalar *ax;
535   PetscScalar       *ay;
536   PetscMemType       axmtype, aymtype;
537 
538   PetscFunctionBegin;
539   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
540   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
542   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
543   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
544   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
545   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
546   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   const PetscScalar *ax;
554   PetscScalar       *ay;
555   PetscMemType       axmtype, aymtype;
556   PetscInt           r_start, r_end;
557   PetscInt           c_start_local, c_end_local;
558 
559   PetscFunctionBegin;
560   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
561   PetscCall(VecZeroEntries(a->lvec));
562   PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
563   c_start_local = PetscMax(c_start, r_start);
564   c_end_local   = PetscMin(c_end, r_end);
565   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
566   if (c_end_local > c_start_local) {
567     if (PetscMemTypeHost(aymtype)) {
568       PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
569     } else {
570       PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
571       PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
572     }
573   }
574   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
575   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
576   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
577   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
578   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
579   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
580   PetscFunctionReturn(PETSC_SUCCESS);
581 }
582 
583 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
584 {
585   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
586   const PetscScalar *ax;
587   PetscScalar       *ay;
588   PetscMemType       axmtype, aymtype;
589 
590   PetscFunctionBegin;
591   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
592   PetscCall(VecSet(yy, 0.0));
593   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
594   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
595   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
596   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
597   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
598   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
599   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
600   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
601   PetscFunctionReturn(PETSC_SUCCESS);
602 }
603 
604 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
605 {
606   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
607   const PetscScalar *ax;
608   PetscScalar       *ay;
609   PetscMemType       axmtype, aymtype;
610 
611   PetscFunctionBegin;
612   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
613   PetscCall(VecCopy(yy, zz));
614   PetscCall(VecZeroEntries(a->lvec));
615   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
616   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
617   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
618   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
619   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
620   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
621   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
622   PetscFunctionReturn(PETSC_SUCCESS);
623 }
624 
625 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
626 {
627   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
628   const PetscScalar *ax;
629   PetscScalar       *ay;
630   PetscMemType       axmtype, aymtype;
631 
632   PetscFunctionBegin;
633   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
634   PetscCall(VecCopy(yy, zz));
635   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
636   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
637   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
638   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
639   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
640   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
641   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
642   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
647 {
648   PetscFunctionBegin;
649   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
654 {
655   PetscFunctionBegin;
656   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
657   PetscFunctionReturn(PETSC_SUCCESS);
658 }
659 
660 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
661 {
662   PetscFunctionBegin;
663   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
668 {
669   PetscFunctionBegin;
670   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
671   PetscFunctionReturn(PETSC_SUCCESS);
672 }
673 
674 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
675 {
676   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
677   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
678   PetscScalar       *x;
679   const PetscScalar *av;
680 
681   PetscFunctionBegin;
682   PetscCall(VecGetArray(v, &x));
683   PetscCall(VecGetSize(v, &ng));
684   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
685   PetscCall(VecGetLocalSize(v, &nl));
686   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
687   radd = A->rmap->rstart * m;
688   PetscCall(MatDenseGetArrayRead(a->A, &av));
689   PetscCall(MatDenseGetLDA(a->A, &lda));
690   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
691   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
692   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
693   PetscCall(VecRestoreArray(v, &x));
694   PetscFunctionReturn(PETSC_SUCCESS);
695 }
696 
697 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
698 {
699   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
700 
701   PetscFunctionBegin;
702   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
703   PetscCall(MatStashDestroy_Private(&mat->stash));
704   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
705   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
706   PetscCall(MatDestroy(&mdn->A));
707   PetscCall(VecDestroy(&mdn->lvec));
708   PetscCall(PetscSFDestroy(&mdn->Mvctx));
709   PetscCall(VecDestroy(&mdn->cvec));
710   PetscCall(MatDestroy(&mdn->cmat));
711 
712   PetscCall(PetscFree(mat->data));
713   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
714 
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
727   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
728 #if defined(PETSC_HAVE_ELEMENTAL)
729   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
730 #endif
731 #if defined(PETSC_HAVE_SCALAPACK)
732   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
733 #endif
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
736   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
737 #if defined(PETSC_HAVE_CUDA)
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
756 #endif
757 #if defined(PETSC_HAVE_HIP)
758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
776 #endif
777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
782   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
790   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
791 
792   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
793   PetscFunctionReturn(PETSC_SUCCESS);
794 }
795 
796 #include <petscdraw.h>
797 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
798 {
799   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
800   PetscMPIInt       rank;
801   PetscViewerType   vtype;
802   PetscBool         iascii, isdraw;
803   PetscViewer       sviewer;
804   PetscViewerFormat format;
805 
806   PetscFunctionBegin;
807   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
808   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
809   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
810   if (iascii) {
811     PetscCall(PetscViewerGetType(viewer, &vtype));
812     PetscCall(PetscViewerGetFormat(viewer, &format));
813     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
814       MatInfo info;
815       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
816       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
817       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
818                                                    (PetscInt)info.memory));
819       PetscCall(PetscViewerFlush(viewer));
820       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
821       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
822       PetscFunctionReturn(PETSC_SUCCESS);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       PetscFunctionReturn(PETSC_SUCCESS);
825     }
826   } else if (isdraw) {
827     PetscDraw draw;
828     PetscBool isnull;
829 
830     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
831     PetscCall(PetscDrawIsNull(draw, &isnull));
832     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
833   }
834 
835   {
836     /* assemble the entire matrix onto first processor. */
837     Mat          A;
838     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
839     PetscInt    *cols;
840     PetscScalar *vals;
841 
842     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
843     if (rank == 0) {
844       PetscCall(MatSetSizes(A, M, N, M, N));
845     } else {
846       PetscCall(MatSetSizes(A, 0, 0, M, N));
847     }
848     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
849     PetscCall(MatSetType(A, MATMPIDENSE));
850     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
851 
852     /* Copy the matrix ... This isn't the most efficient means,
853        but it's quick for now */
854     A->insertmode = INSERT_VALUES;
855 
856     row = mat->rmap->rstart;
857     m   = mdn->A->rmap->n;
858     for (i = 0; i < m; i++) {
859       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
860       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
861       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
862       row++;
863     }
864 
865     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
866     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
867     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
868     if (rank == 0) {
869       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
870       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
871     }
872     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
873     PetscCall(MatDestroy(&A));
874   }
875   PetscFunctionReturn(PETSC_SUCCESS);
876 }
877 
878 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
879 {
880   PetscBool iascii, isbinary, isdraw, issocket;
881 
882   PetscFunctionBegin;
883   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
884   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
885   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
886   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
887 
888   if (iascii || issocket || isdraw) {
889     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
890   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
891   PetscFunctionReturn(PETSC_SUCCESS);
892 }
893 
894 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
895 {
896   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
897   Mat            mdn = mat->A;
898   PetscLogDouble isend[5], irecv[5];
899 
900   PetscFunctionBegin;
901   info->block_size = 1.0;
902 
903   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
904 
905   isend[0] = info->nz_used;
906   isend[1] = info->nz_allocated;
907   isend[2] = info->nz_unneeded;
908   isend[3] = info->memory;
909   isend[4] = info->mallocs;
910   if (flag == MAT_LOCAL) {
911     info->nz_used      = isend[0];
912     info->nz_allocated = isend[1];
913     info->nz_unneeded  = isend[2];
914     info->memory       = isend[3];
915     info->mallocs      = isend[4];
916   } else if (flag == MAT_GLOBAL_MAX) {
917     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
918 
919     info->nz_used      = irecv[0];
920     info->nz_allocated = irecv[1];
921     info->nz_unneeded  = irecv[2];
922     info->memory       = irecv[3];
923     info->mallocs      = irecv[4];
924   } else if (flag == MAT_GLOBAL_SUM) {
925     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
926 
927     info->nz_used      = irecv[0];
928     info->nz_allocated = irecv[1];
929     info->nz_unneeded  = irecv[2];
930     info->memory       = irecv[3];
931     info->mallocs      = irecv[4];
932   }
933   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
934   info->fill_ratio_needed = 0;
935   info->factor_mallocs    = 0;
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
939 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
940 {
941   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
942 
943   PetscFunctionBegin;
944   switch (op) {
945   case MAT_NEW_NONZERO_LOCATIONS:
946   case MAT_NEW_NONZERO_LOCATION_ERR:
947   case MAT_NEW_NONZERO_ALLOCATION_ERR:
948     MatCheckPreallocated(A, 1);
949     PetscCall(MatSetOption(a->A, op, flg));
950     break;
951   case MAT_ROW_ORIENTED:
952     MatCheckPreallocated(A, 1);
953     a->roworiented = flg;
954     PetscCall(MatSetOption(a->A, op, flg));
955     break;
956   case MAT_IGNORE_OFF_PROC_ENTRIES:
957     a->donotstash = flg;
958     break;
959   case MAT_SYMMETRIC:
960   case MAT_STRUCTURALLY_SYMMETRIC:
961   case MAT_HERMITIAN:
962   case MAT_SYMMETRY_ETERNAL:
963   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
964   case MAT_SPD:
965   case MAT_SPD_ETERNAL:
966     /* if the diagonal matrix is square it inherits some of the properties above */
967     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
968     break;
969   default:
970     break;
971   }
972   PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 
975 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
976 {
977   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
978   const PetscScalar *l;
979   PetscScalar        x, *v, *vv, *r;
980   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
981 
982   PetscFunctionBegin;
983   PetscCall(MatDenseGetArray(mdn->A, &vv));
984   PetscCall(MatDenseGetLDA(mdn->A, &lda));
985   PetscCall(MatGetLocalSize(A, &s2, &s3));
986   if (ll) {
987     PetscCall(VecGetLocalSize(ll, &s2a));
988     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
989     PetscCall(VecGetArrayRead(ll, &l));
990     for (i = 0; i < m; i++) {
991       x = l[i];
992       v = vv + i;
993       for (j = 0; j < n; j++) {
994         (*v) *= x;
995         v += lda;
996       }
997     }
998     PetscCall(VecRestoreArrayRead(ll, &l));
999     PetscCall(PetscLogFlops(1.0 * n * m));
1000   }
1001   if (rr) {
1002     const PetscScalar *ar;
1003 
1004     PetscCall(VecGetLocalSize(rr, &s3a));
1005     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1006     PetscCall(VecGetArrayRead(rr, &ar));
1007     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1008     PetscCall(VecGetArray(mdn->lvec, &r));
1009     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1010     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1011     PetscCall(VecRestoreArrayRead(rr, &ar));
1012     for (i = 0; i < n; i++) {
1013       x = r[i];
1014       v = vv + i * lda;
1015       for (j = 0; j < m; j++) (*v++) *= x;
1016     }
1017     PetscCall(VecRestoreArray(mdn->lvec, &r));
1018     PetscCall(PetscLogFlops(1.0 * n * m));
1019   }
1020   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1021   PetscFunctionReturn(PETSC_SUCCESS);
1022 }
1023 
1024 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1025 {
1026   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1027   PetscInt           i, j;
1028   PetscMPIInt        size;
1029   PetscReal          sum = 0.0;
1030   const PetscScalar *av, *v;
1031 
1032   PetscFunctionBegin;
1033   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1034   v = av;
1035   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1036   if (size == 1) {
1037     PetscCall(MatNorm(mdn->A, type, nrm));
1038   } else {
1039     if (type == NORM_FROBENIUS) {
1040       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1041         sum += PetscRealPart(PetscConj(*v) * (*v));
1042         v++;
1043       }
1044       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1045       *nrm = PetscSqrtReal(*nrm);
1046       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1047     } else if (type == NORM_1) {
1048       PetscReal *tmp;
1049 
1050       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1051       *nrm = 0.0;
1052       v    = av;
1053       for (j = 0; j < mdn->A->cmap->n; j++) {
1054         for (i = 0; i < mdn->A->rmap->n; i++) {
1055           tmp[j] += PetscAbsScalar(*v);
1056           v++;
1057         }
1058       }
1059       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1060       for (j = 0; j < A->cmap->N; j++) {
1061         if (tmp[j] > *nrm) *nrm = tmp[j];
1062       }
1063       PetscCall(PetscFree(tmp));
1064       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1065     } else if (type == NORM_INFINITY) { /* max row norm */
1066       PetscCall(MatNorm(mdn->A, type, nrm));
1067       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1068     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1069   }
1070   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1071   PetscFunctionReturn(PETSC_SUCCESS);
1072 }
1073 
1074 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1075 {
1076   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1077   Mat           B;
1078   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1079   PetscInt      j, i, lda;
1080   PetscScalar  *v;
1081 
1082   PetscFunctionBegin;
1083   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1084   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1085     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1086     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1087     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1088     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1089   } else B = *matout;
1090 
1091   m = a->A->rmap->n;
1092   n = a->A->cmap->n;
1093   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1094   PetscCall(MatDenseGetLDA(a->A, &lda));
1095   PetscCall(PetscMalloc1(m, &rwork));
1096   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1097   for (j = 0; j < n; j++) {
1098     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1099     v = PetscSafePointerPlusOffset(v, lda);
1100   }
1101   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1102   PetscCall(PetscFree(rwork));
1103   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1104   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1105   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1106     *matout = B;
1107   } else {
1108     PetscCall(MatHeaderMerge(A, &B));
1109   }
1110   PetscFunctionReturn(PETSC_SUCCESS);
1111 }
1112 
1113 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1114 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1115 
1116 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1117 {
1118   PetscFunctionBegin;
1119   PetscCall(PetscLayoutSetUp(A->rmap));
1120   PetscCall(PetscLayoutSetUp(A->cmap));
1121   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1126 {
1127   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1128 
1129   PetscFunctionBegin;
1130   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1135 {
1136   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1137 
1138   PetscFunctionBegin;
1139   PetscCall(MatConjugate(a->A));
1140   PetscFunctionReturn(PETSC_SUCCESS);
1141 }
1142 
1143 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1144 {
1145   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1146 
1147   PetscFunctionBegin;
1148   PetscCall(MatRealPart(a->A));
1149   PetscFunctionReturn(PETSC_SUCCESS);
1150 }
1151 
1152 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1153 {
1154   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1155 
1156   PetscFunctionBegin;
1157   PetscCall(MatImaginaryPart(a->A));
1158   PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160 
1161 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1162 {
1163   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1167   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1168   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1173 
1174 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1175 {
1176   PetscInt      i, m, n;
1177   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatGetSize(A, &m, &n));
1181   if (type == REDUCTION_MEAN_REALPART) {
1182     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1183   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1184     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1185   } else {
1186     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1187   }
1188   if (type == NORM_2) {
1189     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1190   }
1191   if (type == NORM_INFINITY) {
1192     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1193   } else {
1194     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1195   }
1196   if (type == NORM_2) {
1197     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1198   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1199     for (i = 0; i < n; i++) reductions[i] /= m;
1200   }
1201   PetscFunctionReturn(PETSC_SUCCESS);
1202 }
1203 
1204 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1205 {
1206   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(MatSetRandom(d->A, rctx));
1210 #if defined(PETSC_HAVE_DEVICE)
1211   x->offloadmask = d->A->offloadmask;
1212 #endif
1213   PetscFunctionReturn(PETSC_SUCCESS);
1214 }
1215 
1216 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1217 {
1218   PetscFunctionBegin;
1219   *missing = PETSC_FALSE;
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1224 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1225 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1226 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1227 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1228 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1229 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1230 
1231 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1232                                        MatGetRow_MPIDense,
1233                                        MatRestoreRow_MPIDense,
1234                                        MatMult_MPIDense,
1235                                        /*  4*/ MatMultAdd_MPIDense,
1236                                        MatMultTranspose_MPIDense,
1237                                        MatMultTransposeAdd_MPIDense,
1238                                        NULL,
1239                                        NULL,
1240                                        NULL,
1241                                        /* 10*/ NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        MatTranspose_MPIDense,
1246                                        /* 15*/ MatGetInfo_MPIDense,
1247                                        MatEqual_MPIDense,
1248                                        MatGetDiagonal_MPIDense,
1249                                        MatDiagonalScale_MPIDense,
1250                                        MatNorm_MPIDense,
1251                                        /* 20*/ MatAssemblyBegin_MPIDense,
1252                                        MatAssemblyEnd_MPIDense,
1253                                        MatSetOption_MPIDense,
1254                                        MatZeroEntries_MPIDense,
1255                                        /* 24*/ MatZeroRows_MPIDense,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        /* 29*/ MatSetUp_MPIDense,
1261                                        NULL,
1262                                        NULL,
1263                                        MatGetDiagonalBlock_MPIDense,
1264                                        NULL,
1265                                        /* 34*/ MatDuplicate_MPIDense,
1266                                        NULL,
1267                                        NULL,
1268                                        NULL,
1269                                        NULL,
1270                                        /* 39*/ MatAXPY_MPIDense,
1271                                        MatCreateSubMatrices_MPIDense,
1272                                        NULL,
1273                                        MatGetValues_MPIDense,
1274                                        MatCopy_MPIDense,
1275                                        /* 44*/ NULL,
1276                                        MatScale_MPIDense,
1277                                        MatShift_MPIDense,
1278                                        NULL,
1279                                        NULL,
1280                                        /* 49*/ MatSetRandom_MPIDense,
1281                                        NULL,
1282                                        NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        /* 54*/ NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1291                                        MatDestroy_MPIDense,
1292                                        MatView_MPIDense,
1293                                        NULL,
1294                                        NULL,
1295                                        /* 64*/ NULL,
1296                                        NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        /* 69*/ NULL,
1301                                        NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        NULL,
1305                                        /* 74*/ NULL,
1306                                        NULL,
1307                                        NULL,
1308                                        NULL,
1309                                        MatLoad_MPIDense,
1310                                        /* 79*/ NULL,
1311                                        NULL,
1312                                        NULL,
1313                                        NULL,
1314                                        /* 83*/ NULL,
1315                                        NULL,
1316                                        NULL,
1317                                        NULL,
1318                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1319                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1320                                        /* 89*/ NULL,
1321                                        MatProductSetFromOptions_MPIDense,
1322                                        NULL,
1323                                        NULL,
1324                                        MatConjugate_MPIDense,
1325                                        /* 94*/ NULL,
1326                                        NULL,
1327                                        MatRealPart_MPIDense,
1328                                        MatImaginaryPart_MPIDense,
1329                                        NULL,
1330                                        /*99*/ NULL,
1331                                        NULL,
1332                                        NULL,
1333                                        NULL,
1334                                        MatGetColumnVector_MPIDense,
1335                                        /*104*/ MatMissingDiagonal_MPIDense,
1336                                        NULL,
1337                                        NULL,
1338                                        NULL,
1339                                        NULL,
1340                                        /*109*/ NULL,
1341                                        NULL,
1342                                        NULL,
1343                                        MatMultHermitianTranspose_MPIDense,
1344                                        MatMultHermitianTransposeAdd_MPIDense,
1345                                        /*114*/ NULL,
1346                                        NULL,
1347                                        MatGetColumnReductions_MPIDense,
1348                                        NULL,
1349                                        NULL,
1350                                        NULL,
1351                                        /*120*/ NULL,
1352                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1353                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1354                                        NULL,
1355                                        /*124*/ NULL,
1356                                        NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        /*129*/ NULL,
1361                                        NULL,
1362                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1363                                        NULL,
1364                                        NULL,
1365                                        /*134*/ NULL,
1366                                        NULL,
1367                                        NULL,
1368                                        NULL,
1369                                        NULL,
1370                                        /*139*/ NULL,
1371                                        NULL,
1372                                        NULL,
1373                                        NULL,
1374                                        NULL};
1375 
1376 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1377 {
1378   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1379   MatType       mtype = MATSEQDENSE;
1380 
1381   PetscFunctionBegin;
1382   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1383   PetscCall(PetscLayoutSetUp(mat->rmap));
1384   PetscCall(PetscLayoutSetUp(mat->cmap));
1385   if (!a->A) {
1386     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1387     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1388   }
1389 #if defined(PETSC_HAVE_CUDA)
1390   PetscBool iscuda;
1391   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1392   if (iscuda) mtype = MATSEQDENSECUDA;
1393 #endif
1394 #if defined(PETSC_HAVE_HIP)
1395   PetscBool iship;
1396   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1397   if (iship) mtype = MATSEQDENSEHIP;
1398 #endif
1399   PetscCall(MatSetType(a->A, mtype));
1400   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1401 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1402   mat->offloadmask = a->A->offloadmask;
1403 #endif
1404   mat->preallocated = PETSC_TRUE;
1405   mat->assembled    = PETSC_TRUE;
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1410 {
1411   Mat B, C;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1415   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1416   PetscCall(MatDestroy(&C));
1417   if (reuse == MAT_REUSE_MATRIX) {
1418     C = *newmat;
1419   } else C = NULL;
1420   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1421   PetscCall(MatDestroy(&B));
1422   if (reuse == MAT_INPLACE_MATRIX) {
1423     PetscCall(MatHeaderReplace(A, &C));
1424   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1425   PetscFunctionReturn(PETSC_SUCCESS);
1426 }
1427 
1428 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1429 {
1430   Mat B, C;
1431 
1432   PetscFunctionBegin;
1433   PetscCall(MatDenseGetLocalMatrix(A, &C));
1434   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1435   if (reuse == MAT_REUSE_MATRIX) {
1436     C = *newmat;
1437   } else C = NULL;
1438   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1439   PetscCall(MatDestroy(&B));
1440   if (reuse == MAT_INPLACE_MATRIX) {
1441     PetscCall(MatHeaderReplace(A, &C));
1442   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1443   PetscFunctionReturn(PETSC_SUCCESS);
1444 }
1445 
1446 #if defined(PETSC_HAVE_ELEMENTAL)
1447 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1448 {
1449   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1450   Mat           mat_elemental;
1451   PetscScalar  *v;
1452   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1453 
1454   PetscFunctionBegin;
1455   if (reuse == MAT_REUSE_MATRIX) {
1456     mat_elemental = *newmat;
1457     PetscCall(MatZeroEntries(*newmat));
1458   } else {
1459     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1460     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1461     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1462     PetscCall(MatSetUp(mat_elemental));
1463     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1464   }
1465 
1466   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1467   for (i = 0; i < N; i++) cols[i] = i;
1468   for (i = 0; i < m; i++) rows[i] = rstart + i;
1469 
1470   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1471   PetscCall(MatDenseGetArray(A, &v));
1472   PetscCall(MatDenseGetLDA(a->A, &lda));
1473   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1474   else {
1475     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1476   }
1477   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1478   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1479   PetscCall(MatDenseRestoreArray(A, &v));
1480   PetscCall(PetscFree2(rows, cols));
1481 
1482   if (reuse == MAT_INPLACE_MATRIX) {
1483     PetscCall(MatHeaderReplace(A, &mat_elemental));
1484   } else {
1485     *newmat = mat_elemental;
1486   }
1487   PetscFunctionReturn(PETSC_SUCCESS);
1488 }
1489 #endif
1490 
1491 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1492 {
1493   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1494 
1495   PetscFunctionBegin;
1496   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1501 {
1502   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1503 
1504   PetscFunctionBegin;
1505   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1506   PetscFunctionReturn(PETSC_SUCCESS);
1507 }
1508 
1509 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1510 {
1511   Mat_MPIDense *mat;
1512   PetscInt      m, nloc, N;
1513 
1514   PetscFunctionBegin;
1515   PetscCall(MatGetSize(inmat, &m, &N));
1516   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1517   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1518     PetscInt sum;
1519 
1520     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1521     /* Check sum(n) = N */
1522     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1523     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1524 
1525     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1526     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1527   }
1528 
1529   /* numeric phase */
1530   mat = (Mat_MPIDense *)(*outmat)->data;
1531   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1532   PetscFunctionReturn(PETSC_SUCCESS);
1533 }
1534 
1535 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1536 {
1537   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1538   PetscInt      lda;
1539 
1540   PetscFunctionBegin;
1541   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1542   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1543   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1544   a->vecinuse = col + 1;
1545   PetscCall(MatDenseGetLDA(a->A, &lda));
1546   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1547   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1548   *v = a->cvec;
1549   PetscFunctionReturn(PETSC_SUCCESS);
1550 }
1551 
1552 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1553 {
1554   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1555 
1556   PetscFunctionBegin;
1557   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1558   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1559   VecCheckAssembled(a->cvec);
1560   a->vecinuse = 0;
1561   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1562   PetscCall(VecResetArray(a->cvec));
1563   if (v) *v = NULL;
1564   PetscFunctionReturn(PETSC_SUCCESS);
1565 }
1566 
1567 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1568 {
1569   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1570   PetscInt      lda;
1571 
1572   PetscFunctionBegin;
1573   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1574   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1575   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1576   a->vecinuse = col + 1;
1577   PetscCall(MatDenseGetLDA(a->A, &lda));
1578   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1579   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1580   PetscCall(VecLockReadPush(a->cvec));
1581   *v = a->cvec;
1582   PetscFunctionReturn(PETSC_SUCCESS);
1583 }
1584 
1585 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1586 {
1587   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1588 
1589   PetscFunctionBegin;
1590   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1591   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1592   VecCheckAssembled(a->cvec);
1593   a->vecinuse = 0;
1594   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1595   PetscCall(VecLockReadPop(a->cvec));
1596   PetscCall(VecResetArray(a->cvec));
1597   if (v) *v = NULL;
1598   PetscFunctionReturn(PETSC_SUCCESS);
1599 }
1600 
1601 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1602 {
1603   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1604   PetscInt      lda;
1605 
1606   PetscFunctionBegin;
1607   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1608   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1609   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1610   a->vecinuse = col + 1;
1611   PetscCall(MatDenseGetLDA(a->A, &lda));
1612   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1613   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1614   *v = a->cvec;
1615   PetscFunctionReturn(PETSC_SUCCESS);
1616 }
1617 
1618 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1619 {
1620   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1621 
1622   PetscFunctionBegin;
1623   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1624   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1625   VecCheckAssembled(a->cvec);
1626   a->vecinuse = 0;
1627   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1628   PetscCall(VecResetArray(a->cvec));
1629   if (v) *v = NULL;
1630   PetscFunctionReturn(PETSC_SUCCESS);
1631 }
1632 
1633 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1634 {
1635   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1636   Mat_MPIDense *c;
1637   MPI_Comm      comm;
1638   PetscInt      prbegin, prend, pcbegin, pcend;
1639 
1640   PetscFunctionBegin;
1641   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1642   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1643   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1644   prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1645   prend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1646   pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1647   pcend   = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1648   if (!a->cmat) {
1649     PetscCall(MatCreate(comm, &a->cmat));
1650     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1651     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1652     else {
1653       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1654       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1655       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1656     }
1657     if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1658     else {
1659       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1660       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1661       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1662     }
1663     c             = (Mat_MPIDense *)a->cmat->data;
1664     c->sub_rbegin = rbegin;
1665     c->sub_rend   = rend;
1666     c->sub_cbegin = cbegin;
1667     c->sub_cend   = cend;
1668   }
1669   c = (Mat_MPIDense *)a->cmat->data;
1670   if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1671     PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1672     PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1673     PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1674     PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1675     PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1676     c->sub_rbegin = rbegin;
1677     c->sub_rend   = rend;
1678   }
1679   if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1680     // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1681     if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1682       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1683       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1684       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1685       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1686       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1687       PetscCall(VecDestroy(&c->lvec));
1688       PetscCall(PetscSFDestroy(&c->Mvctx));
1689     }
1690     c->sub_cbegin = cbegin;
1691     c->sub_cend   = cend;
1692   }
1693   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1694   PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1695 
1696   a->cmat->preallocated = PETSC_TRUE;
1697   a->cmat->assembled    = PETSC_TRUE;
1698 #if defined(PETSC_HAVE_DEVICE)
1699   a->cmat->offloadmask = c->A->offloadmask;
1700 #endif
1701   a->matinuse = cbegin + 1;
1702   *v          = a->cmat;
1703   PetscFunctionReturn(PETSC_SUCCESS);
1704 }
1705 
1706 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1707 {
1708   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1709   Mat_MPIDense *c;
1710 
1711   PetscFunctionBegin;
1712   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1713   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1714   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1715   a->matinuse = 0;
1716   c           = (Mat_MPIDense *)a->cmat->data;
1717   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1718   if (v) *v = NULL;
1719 #if defined(PETSC_HAVE_DEVICE)
1720   A->offloadmask = a->A->offloadmask;
1721 #endif
1722   PetscFunctionReturn(PETSC_SUCCESS);
1723 }
1724 
1725 /*MC
1726    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1727 
1728    Options Database Key:
1729 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1730 
1731   Level: beginner
1732 
1733 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1734 M*/
1735 PetscErrorCode MatCreate_MPIDense(Mat mat)
1736 {
1737   Mat_MPIDense *a;
1738 
1739   PetscFunctionBegin;
1740   PetscCall(PetscNew(&a));
1741   mat->data   = (void *)a;
1742   mat->ops[0] = MatOps_Values;
1743 
1744   mat->insertmode = NOT_SET_VALUES;
1745 
1746   /* build cache for off array entries formed */
1747   a->donotstash = PETSC_FALSE;
1748 
1749   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1750 
1751   /* stuff used for matrix vector multiply */
1752   a->lvec        = NULL;
1753   a->Mvctx       = NULL;
1754   a->roworiented = PETSC_TRUE;
1755 
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1777 #if defined(PETSC_HAVE_ELEMENTAL)
1778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1779 #endif
1780 #if defined(PETSC_HAVE_SCALAPACK)
1781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1782 #endif
1783 #if defined(PETSC_HAVE_CUDA)
1784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1785 #endif
1786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1789 #if defined(PETSC_HAVE_CUDA)
1790   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1791   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1792 #endif
1793 #if defined(PETSC_HAVE_HIP)
1794   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1795   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1796   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1797 #endif
1798   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1799   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1800   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1801   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1802   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1803   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1804   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 /*MC
1809    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1810 
1811    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1812    and `MATMPIDENSE` otherwise.
1813 
1814    Options Database Key:
1815 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1816 
1817   Level: beginner
1818 
1819 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1820 M*/
1821 
1822 /*@
1823   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1824 
1825   Collective
1826 
1827   Input Parameters:
1828 + B    - the matrix
1829 - data - optional location of matrix data.  Set to `NULL` for PETSc
1830          to control all matrix memory allocation.
1831 
1832   Level: intermediate
1833 
1834   Notes:
1835   The dense format is fully compatible with standard Fortran
1836   storage by columns.
1837 
1838   The data input variable is intended primarily for Fortran programmers
1839   who wish to allocate their own matrix memory space.  Most users should
1840   set `data` to `NULL`.
1841 
1842 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1843 @*/
1844 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1845 {
1846   PetscFunctionBegin;
1847   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1848   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1849   PetscFunctionReturn(PETSC_SUCCESS);
1850 }
1851 
1852 /*@
1853   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1854   array provided by the user. This is useful to avoid copying an array
1855   into a matrix
1856 
1857   Not Collective
1858 
1859   Input Parameters:
1860 + mat   - the matrix
1861 - array - the array in column major order
1862 
1863   Level: developer
1864 
1865   Note:
1866   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1867 
1868   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1869   freed when the matrix is destroyed.
1870 
1871 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1872           `MatDenseReplaceArray()`
1873 @*/
1874 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1875 {
1876   PetscFunctionBegin;
1877   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1878   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1879   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1880 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1881   mat->offloadmask = PETSC_OFFLOAD_CPU;
1882 #endif
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 /*@
1887   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1888 
1889   Not Collective
1890 
1891   Input Parameter:
1892 . mat - the matrix
1893 
1894   Level: developer
1895 
1896   Note:
1897   You can only call this after a call to `MatDensePlaceArray()`
1898 
1899 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1900 @*/
1901 PetscErrorCode MatDenseResetArray(Mat mat)
1902 {
1903   PetscFunctionBegin;
1904   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1905   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1906   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1907   PetscFunctionReturn(PETSC_SUCCESS);
1908 }
1909 
1910 /*@
1911   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1912   array provided by the user. This is useful to avoid copying an array
1913   into a matrix
1914 
1915   Not Collective
1916 
1917   Input Parameters:
1918 + mat   - the matrix
1919 - array - the array in column major order
1920 
1921   Level: developer
1922 
1923   Note:
1924   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1925 
1926   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1927   freed by the user. It will be freed when the matrix is destroyed.
1928 
1929 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1930 @*/
1931 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1932 {
1933   PetscFunctionBegin;
1934   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1935   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1936   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1937 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1938   mat->offloadmask = PETSC_OFFLOAD_CPU;
1939 #endif
1940   PetscFunctionReturn(PETSC_SUCCESS);
1941 }
1942 
1943 /*@
1944   MatCreateDense - Creates a matrix in `MATDENSE` format.
1945 
1946   Collective
1947 
1948   Input Parameters:
1949 + comm - MPI communicator
1950 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1951 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1952 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1953 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1954 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1955    to control all matrix memory allocation.
1956 
1957   Output Parameter:
1958 . A - the matrix
1959 
1960   Level: intermediate
1961 
1962   Notes:
1963   The dense format is fully compatible with standard Fortran
1964   storage by columns.
1965 
1966   Although local portions of the matrix are stored in column-major
1967   order, the matrix is partitioned across MPI ranks by row.
1968 
1969   The data input variable is intended primarily for Fortran programmers
1970   who wish to allocate their own matrix memory space.  Most users should
1971   set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).
1972 
1973   The user MUST specify either the local or global matrix dimensions
1974   (possibly both).
1975 
1976 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1977 @*/
1978 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1979 {
1980   PetscFunctionBegin;
1981   PetscCall(MatCreate(comm, A));
1982   PetscCall(MatSetSizes(*A, m, n, M, N));
1983   PetscCall(MatSetType(*A, MATDENSE));
1984   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1985   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1986   PetscFunctionReturn(PETSC_SUCCESS);
1987 }
1988 
1989 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1990 {
1991   Mat           mat;
1992   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1993 
1994   PetscFunctionBegin;
1995   *newmat = NULL;
1996   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1997   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1998   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1999   a = (Mat_MPIDense *)mat->data;
2000 
2001   mat->factortype   = A->factortype;
2002   mat->assembled    = PETSC_TRUE;
2003   mat->preallocated = PETSC_TRUE;
2004 
2005   mat->insertmode = NOT_SET_VALUES;
2006   a->donotstash   = oldmat->donotstash;
2007 
2008   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2009   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2010 
2011   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2012 
2013   *newmat = mat;
2014   PetscFunctionReturn(PETSC_SUCCESS);
2015 }
2016 
2017 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2018 {
2019   PetscBool isbinary;
2020 #if defined(PETSC_HAVE_HDF5)
2021   PetscBool ishdf5;
2022 #endif
2023 
2024   PetscFunctionBegin;
2025   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2026   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2027   /* force binary viewer to load .info file if it has not yet done so */
2028   PetscCall(PetscViewerSetUp(viewer));
2029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2030 #if defined(PETSC_HAVE_HDF5)
2031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2032 #endif
2033   if (isbinary) {
2034     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2035 #if defined(PETSC_HAVE_HDF5)
2036   } else if (ishdf5) {
2037     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2038 #endif
2039   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2040   PetscFunctionReturn(PETSC_SUCCESS);
2041 }
2042 
2043 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2044 {
2045   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2046   Mat           a, b;
2047 
2048   PetscFunctionBegin;
2049   a = matA->A;
2050   b = matB->A;
2051   PetscCall(MatEqual(a, b, flag));
2052   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2057 {
2058   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2059 
2060   PetscFunctionBegin;
2061   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2062   PetscCall(MatDestroy(&atb->atb));
2063   PetscCall(PetscFree(atb));
2064   PetscFunctionReturn(PETSC_SUCCESS);
2065 }
2066 
2067 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2068 {
2069   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2070 
2071   PetscFunctionBegin;
2072   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2073   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2074   PetscCall(PetscFree(abt));
2075   PetscFunctionReturn(PETSC_SUCCESS);
2076 }
2077 
2078 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2079 {
2080   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2081   Mat_TransMatMultDense *atb;
2082   MPI_Comm               comm;
2083   PetscMPIInt            size, *recvcounts;
2084   PetscScalar           *carray, *sendbuf;
2085   const PetscScalar     *atbarray;
2086   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2087   const PetscInt        *ranges;
2088 
2089   PetscFunctionBegin;
2090   MatCheckProduct(C, 3);
2091   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2092   atb        = (Mat_TransMatMultDense *)C->product->data;
2093   recvcounts = atb->recvcounts;
2094   sendbuf    = atb->sendbuf;
2095 
2096   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2097   PetscCallMPI(MPI_Comm_size(comm, &size));
2098 
2099   /* compute atbarray = aseq^T * bseq */
2100   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2101 
2102   PetscCall(MatGetOwnershipRanges(C, &ranges));
2103 
2104   if (ranges[1] == C->rmap->N) {
2105     /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2106     PetscInt           atb_lda, c_lda;
2107     Mat                atb_local = atb->atb;
2108     Mat                atb_alloc = NULL;
2109     Mat                c_local   = c->A;
2110     Mat                c_alloc   = NULL;
2111     PetscMemType       atb_memtype, c_memtype;
2112     const PetscScalar *atb_array = NULL;
2113     MPI_Datatype       vector_type;
2114     PetscScalar       *c_array = NULL;
2115     PetscMPIInt        rank;
2116 
2117     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2118 
2119     PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2120     if (atb_lda != C->rmap->N) {
2121       // copy atb to a matrix that will have lda == the number of rows
2122       PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2123       PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2124       atb_local = atb_alloc;
2125     }
2126 
2127     if (rank == 0) {
2128       PetscCall(MatDenseGetLDA(c_local, &c_lda));
2129       if (c_lda != C->rmap->N) {
2130         // copy c to a matrix that will have lda == the number of rows
2131         PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2132         c_local = c_alloc;
2133       }
2134       PetscCall(MatZeroEntries(c_local));
2135     }
2136     /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2137      * B->cmap->N: use the a->Mvctx to use the best reduction method */
2138     if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2139     vector_type = MPIU_SCALAR;
2140     if (B->cmap->N > 1) {
2141       PetscMPIInt mpi_N;
2142 
2143       PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2144       PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2145       PetscCallMPI(MPI_Type_commit(&vector_type));
2146     }
2147     PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2148     PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2149     PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2150     PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2151     PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2152     PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2153     if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2154     if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2155     PetscCall(MatDestroy(&atb_alloc));
2156     PetscCall(MatDestroy(&c_alloc));
2157     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2158     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2159     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2160     PetscFunctionReturn(PETSC_SUCCESS);
2161   }
2162 
2163   /* arrange atbarray into sendbuf */
2164   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2165   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2166   for (proc = 0, k = 0; proc < size; proc++) {
2167     for (j = 0; j < cN; j++) {
2168       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2169     }
2170   }
2171   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2172 
2173   /* sum all atbarray to local values of C */
2174   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2175   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2176   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2177   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2178   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2179   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2180   PetscFunctionReturn(PETSC_SUCCESS);
2181 }
2182 
2183 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2184 {
2185   MPI_Comm               comm;
2186   PetscMPIInt            size;
2187   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2188   Mat_TransMatMultDense *atb;
2189   PetscBool              cisdense = PETSC_FALSE;
2190   const PetscInt        *ranges;
2191 
2192   PetscFunctionBegin;
2193   MatCheckProduct(C, 4);
2194   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2195   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2196   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2197     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2198   }
2199 
2200   /* create matrix product C */
2201   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2202 #if defined(PETSC_HAVE_CUDA)
2203   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2204 #endif
2205 #if defined(PETSC_HAVE_HIP)
2206   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2207 #endif
2208   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2209   PetscCall(MatSetUp(C));
2210 
2211   /* create data structure for reuse C */
2212   PetscCallMPI(MPI_Comm_size(comm, &size));
2213   PetscCall(PetscNew(&atb));
2214   cM = C->rmap->N;
2215   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2216   PetscCall(MatGetOwnershipRanges(C, &ranges));
2217   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2218   C->product->data    = atb;
2219   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2220   PetscFunctionReturn(PETSC_SUCCESS);
2221 }
2222 
2223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2224 {
2225   MPI_Comm               comm;
2226   PetscMPIInt            i, size;
2227   PetscInt               maxRows, bufsiz;
2228   PetscMPIInt            tag;
2229   PetscInt               alg;
2230   Mat_MatTransMultDense *abt;
2231   Mat_Product           *product = C->product;
2232   PetscBool              flg;
2233 
2234   PetscFunctionBegin;
2235   MatCheckProduct(C, 4);
2236   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2237   /* check local size of A and B */
2238   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2239 
2240   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2241   alg = flg ? 0 : 1;
2242 
2243   /* setup matrix product C */
2244   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2245   PetscCall(MatSetType(C, MATMPIDENSE));
2246   PetscCall(MatSetUp(C));
2247   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2248 
2249   /* create data structure for reuse C */
2250   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2251   PetscCallMPI(MPI_Comm_size(comm, &size));
2252   PetscCall(PetscNew(&abt));
2253   abt->tag = tag;
2254   abt->alg = alg;
2255   switch (alg) {
2256   case 1: /* alg: "cyclic" */
2257     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2258     bufsiz = A->cmap->N * maxRows;
2259     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2260     break;
2261   default: /* alg: "allgatherv" */
2262     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2263     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2264     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2265     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2266     break;
2267   }
2268 
2269   C->product->data                = abt;
2270   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2271   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2272   PetscFunctionReturn(PETSC_SUCCESS);
2273 }
2274 
2275 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2276 {
2277   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2278   Mat_MatTransMultDense *abt;
2279   MPI_Comm               comm;
2280   PetscMPIInt            rank, size, sendto, recvfrom, recvisfrom;
2281   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2282   PetscInt               i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2283   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2284   const PetscScalar     *av, *bv;
2285   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2286   MPI_Request            reqs[2];
2287   const PetscInt        *ranges;
2288 
2289   PetscFunctionBegin;
2290   MatCheckProduct(C, 3);
2291   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2292   abt = (Mat_MatTransMultDense *)C->product->data;
2293   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2294   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2295   PetscCallMPI(MPI_Comm_size(comm, &size));
2296   PetscCall(MatDenseGetArrayRead(a->A, &av));
2297   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2298   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2299   PetscCall(MatDenseGetLDA(a->A, &i));
2300   PetscCall(PetscBLASIntCast(i, &alda));
2301   PetscCall(MatDenseGetLDA(b->A, &i));
2302   PetscCall(PetscBLASIntCast(i, &blda));
2303   PetscCall(MatDenseGetLDA(c->A, &i));
2304   PetscCall(PetscBLASIntCast(i, &clda));
2305   PetscCall(MatGetOwnershipRanges(B, &ranges));
2306   bn = B->rmap->n;
2307   if (blda == bn) {
2308     sendbuf = (PetscScalar *)bv;
2309   } else {
2310     sendbuf = abt->buf[0];
2311     for (k = 0, i = 0; i < cK; i++) {
2312       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2313     }
2314   }
2315   if (size > 1) {
2316     sendto   = (rank + size - 1) % size;
2317     recvfrom = (rank + size + 1) % size;
2318   } else {
2319     sendto = recvfrom = 0;
2320   }
2321   PetscCall(PetscBLASIntCast(cK, &ck));
2322   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2323   recvisfrom = rank;
2324   for (i = 0; i < size; i++) {
2325     /* we have finished receiving in sending, bufs can be read/modified */
2326     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2327     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2328 
2329     if (nextrecvisfrom != rank) {
2330       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2331       sendsiz = cK * bn;
2332       recvsiz = cK * nextbn;
2333       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2334       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2335       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2336     }
2337 
2338     /* local aseq * sendbuf^T */
2339     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2340     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2341 
2342     if (nextrecvisfrom != rank) {
2343       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2344       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2345     }
2346     bn         = nextbn;
2347     recvisfrom = nextrecvisfrom;
2348     sendbuf    = recvbuf;
2349   }
2350   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2351   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2352   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2353   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2354   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2355   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2356   PetscFunctionReturn(PETSC_SUCCESS);
2357 }
2358 
2359 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2360 {
2361   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2362   Mat_MatTransMultDense *abt;
2363   MPI_Comm               comm;
2364   PetscMPIInt            size, ibn;
2365   PetscScalar           *cv, *sendbuf, *recvbuf;
2366   const PetscScalar     *av, *bv;
2367   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2368   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2369   PetscBLASInt           cm, cn, ck, alda, clda;
2370 
2371   PetscFunctionBegin;
2372   MatCheckProduct(C, 3);
2373   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2374   abt = (Mat_MatTransMultDense *)C->product->data;
2375   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2376   PetscCallMPI(MPI_Comm_size(comm, &size));
2377   PetscCall(MatDenseGetArrayRead(a->A, &av));
2378   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2379   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2380   PetscCall(MatDenseGetLDA(a->A, &i));
2381   PetscCall(PetscBLASIntCast(i, &alda));
2382   PetscCall(MatDenseGetLDA(b->A, &blda));
2383   PetscCall(MatDenseGetLDA(c->A, &i));
2384   PetscCall(PetscBLASIntCast(i, &clda));
2385   /* copy transpose of B into buf[0] */
2386   bn      = B->rmap->n;
2387   sendbuf = abt->buf[0];
2388   recvbuf = abt->buf[1];
2389   for (k = 0, j = 0; j < bn; j++) {
2390     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2391   }
2392   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2393   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2394   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2395   PetscCall(PetscBLASIntCast(cK, &ck));
2396   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2397   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2398   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2399   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2400   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2401   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2402   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2403   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2404   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2405   PetscFunctionReturn(PETSC_SUCCESS);
2406 }
2407 
2408 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2409 {
2410   Mat_MatTransMultDense *abt;
2411 
2412   PetscFunctionBegin;
2413   MatCheckProduct(C, 3);
2414   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2415   abt = (Mat_MatTransMultDense *)C->product->data;
2416   switch (abt->alg) {
2417   case 1:
2418     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2419     break;
2420   default:
2421     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2422     break;
2423   }
2424   PetscFunctionReturn(PETSC_SUCCESS);
2425 }
2426 
2427 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2428 {
2429   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2430 
2431   PetscFunctionBegin;
2432   PetscCall(MatDestroy(&ab->Ce));
2433   PetscCall(MatDestroy(&ab->Ae));
2434   PetscCall(MatDestroy(&ab->Be));
2435   PetscCall(PetscFree(ab));
2436   PetscFunctionReturn(PETSC_SUCCESS);
2437 }
2438 
2439 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2440 {
2441   Mat_MatMultDense *ab;
2442   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2443   Mat_MPIDense     *b   = (Mat_MPIDense *)B->data;
2444 
2445   PetscFunctionBegin;
2446   MatCheckProduct(C, 3);
2447   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2448   ab = (Mat_MatMultDense *)C->product->data;
2449   if (ab->Ae && ab->Ce) {
2450 #if PetscDefined(HAVE_ELEMENTAL)
2451     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2452     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2453     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2454     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2455 #else
2456     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2457 #endif
2458   } else {
2459     MPI_Comm           comm;
2460     const PetscScalar *read;
2461     PetscScalar       *write;
2462     PetscInt           lda;
2463     const PetscInt    *ranges;
2464     PetscMPIInt        size;
2465 
2466     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2467     comm = PetscObjectComm((PetscObject)B);
2468     PetscCallMPI(MPI_Comm_size(comm, &size));
2469     PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2470     if (ranges[1] == ranges[size]) {
2471       // optimize for the case where the B matrix is broadcast from rank 0
2472       PetscInt           b_lda, be_lda;
2473       Mat                b_local  = b->A;
2474       Mat                b_alloc  = NULL;
2475       Mat                be_local = ab->Be;
2476       Mat                be_alloc = NULL;
2477       PetscMemType       b_memtype, be_memtype;
2478       const PetscScalar *b_array = NULL;
2479       MPI_Datatype       vector_type;
2480       PetscScalar       *be_array = NULL;
2481       PetscMPIInt        rank;
2482 
2483       PetscCallMPI(MPI_Comm_rank(comm, &rank));
2484       PetscCall(MatDenseGetLDA(be_local, &be_lda));
2485       if (be_lda != B->rmap->N) {
2486         PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2487         be_local = be_alloc;
2488       }
2489 
2490       if (rank == 0) {
2491         PetscCall(MatDenseGetLDA(b_local, &b_lda));
2492         if (b_lda != B->rmap->N) {
2493           PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2494           PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2495           b_local = b_alloc;
2496         }
2497       }
2498       vector_type = MPIU_SCALAR;
2499       if (B->cmap->N > 1) {
2500         PetscMPIInt mpi_N;
2501 
2502         PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2503         PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2504         PetscCallMPI(MPI_Type_commit(&vector_type));
2505       }
2506       PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2507       PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2508       PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2509       PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2510       PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2511       PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2512       if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2513       if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2514       PetscCall(MatDestroy(&be_alloc));
2515       PetscCall(MatDestroy(&b_alloc));
2516     } else {
2517       PetscCall(MatDenseGetLDA(B, &lda));
2518       PetscCall(MatDenseGetArrayRead(B, &read));
2519       PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2520       for (PetscInt i = 0; i < C->cmap->N; ++i) {
2521         PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2522         PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2523       }
2524       PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2525       PetscCall(MatDenseRestoreArrayRead(B, &read));
2526     }
2527     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2528   }
2529   PetscFunctionReturn(PETSC_SUCCESS);
2530 }
2531 
2532 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2533 {
2534   Mat_Product      *product = C->product;
2535   PetscInt          alg;
2536   Mat_MatMultDense *ab;
2537   PetscBool         flg;
2538 
2539   PetscFunctionBegin;
2540   MatCheckProduct(C, 4);
2541   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2542   /* check local size of A and B */
2543   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2544              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2545 
2546   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2547   alg = flg ? 0 : 1;
2548 
2549   /* setup C */
2550   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2551   PetscCall(MatSetType(C, MATMPIDENSE));
2552   PetscCall(MatSetUp(C));
2553 
2554   /* create data structure for reuse Cdense */
2555   PetscCall(PetscNew(&ab));
2556 
2557   switch (alg) {
2558   case 1: /* alg: "elemental" */
2559 #if PetscDefined(HAVE_ELEMENTAL)
2560     /* create elemental matrices Ae and Be */
2561     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2562     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2563     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2564     PetscCall(MatSetUp(ab->Ae));
2565     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2566 
2567     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2568     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2569     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2570     PetscCall(MatSetUp(ab->Be));
2571     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2572 
2573     /* compute symbolic Ce = Ae*Be */
2574     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2575     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2576 #else
2577     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2578 #endif
2579     break;
2580   default: /* alg: "petsc" */
2581     ab->Ae = NULL;
2582     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2583     ab->Ce = NULL;
2584     break;
2585   }
2586 
2587   C->product->data       = ab;
2588   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2589   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2594 {
2595   Mat_Product *product     = C->product;
2596   const char  *algTypes[2] = {"petsc", "elemental"};
2597   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2598   PetscBool    flg = PETSC_FALSE;
2599 
2600   PetscFunctionBegin;
2601   /* Set default algorithm */
2602   alg = 0; /* default is PETSc */
2603   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2604   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2605 
2606   /* Get runtime option */
2607   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2608   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2609   PetscOptionsEnd();
2610   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2611 
2612   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2613   C->ops->productsymbolic = MatProductSymbolic_AB;
2614   PetscFunctionReturn(PETSC_SUCCESS);
2615 }
2616 
2617 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2618 {
2619   Mat_Product *product = C->product;
2620   Mat          A = product->A, B = product->B;
2621 
2622   PetscFunctionBegin;
2623   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2624              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2625   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2626   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2627   PetscFunctionReturn(PETSC_SUCCESS);
2628 }
2629 
2630 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2631 {
2632   Mat_Product *product     = C->product;
2633   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2634   PetscInt     alg, nalg = 2;
2635   PetscBool    flg = PETSC_FALSE;
2636 
2637   PetscFunctionBegin;
2638   /* Set default algorithm */
2639   alg = 0; /* default is allgatherv */
2640   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2641   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2642 
2643   /* Get runtime option */
2644   if (product->api_user) {
2645     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2646     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2647     PetscOptionsEnd();
2648   } else {
2649     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2650     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2651     PetscOptionsEnd();
2652   }
2653   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2654 
2655   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2656   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2661 {
2662   Mat_Product *product = C->product;
2663 
2664   PetscFunctionBegin;
2665   switch (product->type) {
2666   case MATPRODUCT_AB:
2667     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2668     break;
2669   case MATPRODUCT_AtB:
2670     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2671     break;
2672   case MATPRODUCT_ABt:
2673     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2674     break;
2675   default:
2676     break;
2677   }
2678   PetscFunctionReturn(PETSC_SUCCESS);
2679 }
2680