xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f709e8d7050dd385ca03a559b2f7996691fc57b4)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14   matrix that represents the operator. For sequential matrices it returns itself.
15 
16   Input Parameter:
17 . A - the sequential or MPI `MATDENSE` matrix
18 
19   Output Parameter:
20 . B - the inner matrix
21 
22   Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscAssertPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
554   PetscScalar       *x;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecGetArray(v, &x));
559   PetscCall(VecGetSize(v, &ng));
560   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
561   PetscCall(VecGetLocalSize(v, &nl));
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(PetscArrayzero(x + i, nl - i));
569   PetscCall(VecRestoreArray(v, &x));
570   PetscFunctionReturn(PETSC_SUCCESS);
571 }
572 
573 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
574 {
575   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
576 
577   PetscFunctionBegin;
578   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579   PetscCall(MatStashDestroy_Private(&mat->stash));
580   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
581   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
582   PetscCall(MatDestroy(&mdn->A));
583   PetscCall(VecDestroy(&mdn->lvec));
584   PetscCall(PetscSFDestroy(&mdn->Mvctx));
585   PetscCall(VecDestroy(&mdn->cvec));
586   PetscCall(MatDestroy(&mdn->cmat));
587 
588   PetscCall(PetscFree(mat->data));
589   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
590 
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
604 #if defined(PETSC_HAVE_ELEMENTAL)
605   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
606 #endif
607 #if defined(PETSC_HAVE_SCALAPACK)
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
609 #endif
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
613 #if defined(PETSC_HAVE_CUDA)
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
632 #endif
633 #if defined(PETSC_HAVE_HIP)
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
652 #endif
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
663 
664   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
665   PetscFunctionReturn(PETSC_SUCCESS);
666 }
667 
668 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
669 
670 #include <petscdraw.h>
671 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
672 {
673   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
674   PetscMPIInt       rank;
675   PetscViewerType   vtype;
676   PetscBool         iascii, isdraw;
677   PetscViewer       sviewer;
678   PetscViewerFormat format;
679 
680   PetscFunctionBegin;
681   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
683   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
684   if (iascii) {
685     PetscCall(PetscViewerGetType(viewer, &vtype));
686     PetscCall(PetscViewerGetFormat(viewer, &format));
687     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
688       MatInfo info;
689       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
690       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
691       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
692                                                    (PetscInt)info.memory));
693       PetscCall(PetscViewerFlush(viewer));
694       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
695       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
696       PetscFunctionReturn(PETSC_SUCCESS);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       PetscFunctionReturn(PETSC_SUCCESS);
699     }
700   } else if (isdraw) {
701     PetscDraw draw;
702     PetscBool isnull;
703 
704     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
705     PetscCall(PetscDrawIsNull(draw, &isnull));
706     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
707   }
708 
709   {
710     /* assemble the entire matrix onto first processor. */
711     Mat          A;
712     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
713     PetscInt    *cols;
714     PetscScalar *vals;
715 
716     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
717     if (rank == 0) {
718       PetscCall(MatSetSizes(A, M, N, M, N));
719     } else {
720       PetscCall(MatSetSizes(A, 0, 0, M, N));
721     }
722     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
723     PetscCall(MatSetType(A, MATMPIDENSE));
724     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
725 
726     /* Copy the matrix ... This isn't the most efficient means,
727        but it's quick for now */
728     A->insertmode = INSERT_VALUES;
729 
730     row = mat->rmap->rstart;
731     m   = mdn->A->rmap->n;
732     for (i = 0; i < m; i++) {
733       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
734       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
735       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
736       row++;
737     }
738 
739     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
741     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
742     if (rank == 0) {
743       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
744       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
745     }
746     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
747     PetscCall(PetscViewerFlush(viewer));
748     PetscCall(MatDestroy(&A));
749   }
750   PetscFunctionReturn(PETSC_SUCCESS);
751 }
752 
753 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
754 {
755   PetscBool iascii, isbinary, isdraw, issocket;
756 
757   PetscFunctionBegin;
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
761   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
762 
763   if (iascii || issocket || isdraw) {
764     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
765   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
770 {
771   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
772   Mat            mdn = mat->A;
773   PetscLogDouble isend[5], irecv[5];
774 
775   PetscFunctionBegin;
776   info->block_size = 1.0;
777 
778   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
779 
780   isend[0] = info->nz_used;
781   isend[1] = info->nz_allocated;
782   isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;
784   isend[4] = info->mallocs;
785   if (flag == MAT_LOCAL) {
786     info->nz_used      = isend[0];
787     info->nz_allocated = isend[1];
788     info->nz_unneeded  = isend[2];
789     info->memory       = isend[3];
790     info->mallocs      = isend[4];
791   } else if (flag == MAT_GLOBAL_MAX) {
792     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
801 
802     info->nz_used      = irecv[0];
803     info->nz_allocated = irecv[1];
804     info->nz_unneeded  = irecv[2];
805     info->memory       = irecv[3];
806     info->mallocs      = irecv[4];
807   }
808   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
809   info->fill_ratio_needed = 0;
810   info->factor_mallocs    = 0;
811   PetscFunctionReturn(PETSC_SUCCESS);
812 }
813 
814 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
815 {
816   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
817 
818   PetscFunctionBegin;
819   switch (op) {
820   case MAT_NEW_NONZERO_LOCATIONS:
821   case MAT_NEW_NONZERO_LOCATION_ERR:
822   case MAT_NEW_NONZERO_ALLOCATION_ERR:
823     MatCheckPreallocated(A, 1);
824     PetscCall(MatSetOption(a->A, op, flg));
825     break;
826   case MAT_ROW_ORIENTED:
827     MatCheckPreallocated(A, 1);
828     a->roworiented = flg;
829     PetscCall(MatSetOption(a->A, op, flg));
830     break;
831   case MAT_FORCE_DIAGONAL_ENTRIES:
832   case MAT_KEEP_NONZERO_PATTERN:
833   case MAT_USE_HASH_TABLE:
834   case MAT_SORTED_FULL:
835     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
836     break;
837   case MAT_IGNORE_OFF_PROC_ENTRIES:
838     a->donotstash = flg;
839     break;
840   case MAT_SYMMETRIC:
841   case MAT_STRUCTURALLY_SYMMETRIC:
842   case MAT_HERMITIAN:
843   case MAT_SYMMETRY_ETERNAL:
844   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
845   case MAT_SPD:
846   case MAT_IGNORE_LOWER_TRIANGULAR:
847   case MAT_IGNORE_ZERO_ENTRIES:
848   case MAT_SPD_ETERNAL:
849     /* if the diagonal matrix is square it inherits some of the properties above */
850     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
851     break;
852   default:
853     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
854   }
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
859 {
860   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
861   const PetscScalar *l;
862   PetscScalar        x, *v, *vv, *r;
863   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
864 
865   PetscFunctionBegin;
866   PetscCall(MatDenseGetArray(mdn->A, &vv));
867   PetscCall(MatDenseGetLDA(mdn->A, &lda));
868   PetscCall(MatGetLocalSize(A, &s2, &s3));
869   if (ll) {
870     PetscCall(VecGetLocalSize(ll, &s2a));
871     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
872     PetscCall(VecGetArrayRead(ll, &l));
873     for (i = 0; i < m; i++) {
874       x = l[i];
875       v = vv + i;
876       for (j = 0; j < n; j++) {
877         (*v) *= x;
878         v += lda;
879       }
880     }
881     PetscCall(VecRestoreArrayRead(ll, &l));
882     PetscCall(PetscLogFlops(1.0 * n * m));
883   }
884   if (rr) {
885     const PetscScalar *ar;
886 
887     PetscCall(VecGetLocalSize(rr, &s3a));
888     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
889     PetscCall(VecGetArrayRead(rr, &ar));
890     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
891     PetscCall(VecGetArray(mdn->lvec, &r));
892     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
894     PetscCall(VecRestoreArrayRead(rr, &ar));
895     for (i = 0; i < n; i++) {
896       x = r[i];
897       v = vv + i * lda;
898       for (j = 0; j < m; j++) (*v++) *= x;
899     }
900     PetscCall(VecRestoreArray(mdn->lvec, &r));
901     PetscCall(PetscLogFlops(1.0 * n * m));
902   }
903   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
904   PetscFunctionReturn(PETSC_SUCCESS);
905 }
906 
907 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
908 {
909   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
910   PetscInt           i, j;
911   PetscMPIInt        size;
912   PetscReal          sum = 0.0;
913   const PetscScalar *av, *v;
914 
915   PetscFunctionBegin;
916   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
917   v = av;
918   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
919   if (size == 1) {
920     PetscCall(MatNorm(mdn->A, type, nrm));
921   } else {
922     if (type == NORM_FROBENIUS) {
923       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
924         sum += PetscRealPart(PetscConj(*v) * (*v));
925         v++;
926       }
927       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
928       *nrm = PetscSqrtReal(*nrm);
929       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
930     } else if (type == NORM_1) {
931       PetscReal *tmp, *tmp2;
932       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
933       *nrm = 0.0;
934       v    = av;
935       for (j = 0; j < mdn->A->cmap->n; j++) {
936         for (i = 0; i < mdn->A->rmap->n; i++) {
937           tmp[j] += PetscAbsScalar(*v);
938           v++;
939         }
940       }
941       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
942       for (j = 0; j < A->cmap->N; j++) {
943         if (tmp2[j] > *nrm) *nrm = tmp2[j];
944       }
945       PetscCall(PetscFree2(tmp, tmp2));
946       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
947     } else if (type == NORM_INFINITY) { /* max row norm */
948       PetscReal ntemp;
949       PetscCall(MatNorm(mdn->A, type, &ntemp));
950       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
951     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
952   }
953   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
958 {
959   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
960   Mat           B;
961   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
962   PetscInt      j, i, lda;
963   PetscScalar  *v;
964 
965   PetscFunctionBegin;
966   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
967   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
968     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
969     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
970     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
971     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
972   } else B = *matout;
973 
974   m = a->A->rmap->n;
975   n = a->A->cmap->n;
976   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
977   PetscCall(MatDenseGetLDA(a->A, &lda));
978   PetscCall(PetscMalloc1(m, &rwork));
979   for (i = 0; i < m; i++) rwork[i] = rstart + i;
980   for (j = 0; j < n; j++) {
981     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
982     v += lda;
983   }
984   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
985   PetscCall(PetscFree(rwork));
986   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
987   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
989     *matout = B;
990   } else {
991     PetscCall(MatHeaderMerge(A, &B));
992   }
993   PetscFunctionReturn(PETSC_SUCCESS);
994 }
995 
996 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
997 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
998 
999 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1000 {
1001   PetscFunctionBegin;
1002   PetscCall(PetscLayoutSetUp(A->rmap));
1003   PetscCall(PetscLayoutSetUp(A->cmap));
1004   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1009 {
1010   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1011 
1012   PetscFunctionBegin;
1013   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1018 {
1019   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1020 
1021   PetscFunctionBegin;
1022   PetscCall(MatConjugate(a->A));
1023   PetscFunctionReturn(PETSC_SUCCESS);
1024 }
1025 
1026 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1027 {
1028   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(MatRealPart(a->A));
1032   PetscFunctionReturn(PETSC_SUCCESS);
1033 }
1034 
1035 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1036 {
1037   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1038 
1039   PetscFunctionBegin;
1040   PetscCall(MatImaginaryPart(a->A));
1041   PetscFunctionReturn(PETSC_SUCCESS);
1042 }
1043 
1044 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1045 {
1046   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1047 
1048   PetscFunctionBegin;
1049   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1050   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1051   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1056 
1057 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1058 {
1059   PetscInt      i, m, n;
1060   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1061   PetscReal    *work;
1062 
1063   PetscFunctionBegin;
1064   PetscCall(MatGetSize(A, &m, &n));
1065   PetscCall(PetscMalloc1(n, &work));
1066   if (type == REDUCTION_MEAN_REALPART) {
1067     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1068   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1069     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1070   } else {
1071     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1072   }
1073   if (type == NORM_2) {
1074     for (i = 0; i < n; i++) work[i] *= work[i];
1075   }
1076   if (type == NORM_INFINITY) {
1077     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1078   } else {
1079     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1080   }
1081   PetscCall(PetscFree(work));
1082   if (type == NORM_2) {
1083     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1084   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1085     for (i = 0; i < n; i++) reductions[i] /= m;
1086   }
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1091 {
1092   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1093 
1094   PetscFunctionBegin;
1095   PetscCall(MatSetRandom(d->A, rctx));
1096 #if defined(PETSC_HAVE_DEVICE)
1097   x->offloadmask = d->A->offloadmask;
1098 #endif
1099   PetscFunctionReturn(PETSC_SUCCESS);
1100 }
1101 
1102 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1103 {
1104   PetscFunctionBegin;
1105   *missing = PETSC_FALSE;
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1110 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1111 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1112 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1113 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1114 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1115 
1116 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1117                                        MatGetRow_MPIDense,
1118                                        MatRestoreRow_MPIDense,
1119                                        MatMult_MPIDense,
1120                                        /*  4*/ MatMultAdd_MPIDense,
1121                                        MatMultTranspose_MPIDense,
1122                                        MatMultTransposeAdd_MPIDense,
1123                                        NULL,
1124                                        NULL,
1125                                        NULL,
1126                                        /* 10*/ NULL,
1127                                        NULL,
1128                                        NULL,
1129                                        NULL,
1130                                        MatTranspose_MPIDense,
1131                                        /* 15*/ MatGetInfo_MPIDense,
1132                                        MatEqual_MPIDense,
1133                                        MatGetDiagonal_MPIDense,
1134                                        MatDiagonalScale_MPIDense,
1135                                        MatNorm_MPIDense,
1136                                        /* 20*/ MatAssemblyBegin_MPIDense,
1137                                        MatAssemblyEnd_MPIDense,
1138                                        MatSetOption_MPIDense,
1139                                        MatZeroEntries_MPIDense,
1140                                        /* 24*/ MatZeroRows_MPIDense,
1141                                        NULL,
1142                                        NULL,
1143                                        NULL,
1144                                        NULL,
1145                                        /* 29*/ MatSetUp_MPIDense,
1146                                        NULL,
1147                                        NULL,
1148                                        MatGetDiagonalBlock_MPIDense,
1149                                        NULL,
1150                                        /* 34*/ MatDuplicate_MPIDense,
1151                                        NULL,
1152                                        NULL,
1153                                        NULL,
1154                                        NULL,
1155                                        /* 39*/ MatAXPY_MPIDense,
1156                                        MatCreateSubMatrices_MPIDense,
1157                                        NULL,
1158                                        MatGetValues_MPIDense,
1159                                        MatCopy_MPIDense,
1160                                        /* 44*/ NULL,
1161                                        MatScale_MPIDense,
1162                                        MatShift_MPIDense,
1163                                        NULL,
1164                                        NULL,
1165                                        /* 49*/ MatSetRandom_MPIDense,
1166                                        NULL,
1167                                        NULL,
1168                                        NULL,
1169                                        NULL,
1170                                        /* 54*/ NULL,
1171                                        NULL,
1172                                        NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1176                                        MatDestroy_MPIDense,
1177                                        MatView_MPIDense,
1178                                        NULL,
1179                                        NULL,
1180                                        /* 64*/ NULL,
1181                                        NULL,
1182                                        NULL,
1183                                        NULL,
1184                                        NULL,
1185                                        /* 69*/ NULL,
1186                                        NULL,
1187                                        NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        /* 74*/ NULL,
1191                                        NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        /* 79*/ NULL,
1196                                        NULL,
1197                                        NULL,
1198                                        NULL,
1199                                        /* 83*/ MatLoad_MPIDense,
1200                                        NULL,
1201                                        NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        NULL,
1205                                        /* 89*/ NULL,
1206                                        NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        /* 94*/ NULL,
1211                                        NULL,
1212                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1213                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1214                                        NULL,
1215                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1216                                        NULL,
1217                                        NULL,
1218                                        MatConjugate_MPIDense,
1219                                        NULL,
1220                                        /*104*/ NULL,
1221                                        MatRealPart_MPIDense,
1222                                        MatImaginaryPart_MPIDense,
1223                                        NULL,
1224                                        NULL,
1225                                        /*109*/ NULL,
1226                                        NULL,
1227                                        NULL,
1228                                        MatGetColumnVector_MPIDense,
1229                                        MatMissingDiagonal_MPIDense,
1230                                        /*114*/ NULL,
1231                                        NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        /*119*/ NULL,
1236                                        NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        NULL,
1240                                        /*124*/ NULL,
1241                                        MatGetColumnReductions_MPIDense,
1242                                        NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        /*129*/ NULL,
1246                                        NULL,
1247                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1248                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1249                                        NULL,
1250                                        /*134*/ NULL,
1251                                        NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        /*139*/ NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1261                                        /*145*/ NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /*150*/ NULL,
1267                                        NULL};
1268 
1269 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1270 {
1271   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1272   MatType       mtype = MATSEQDENSE;
1273 
1274   PetscFunctionBegin;
1275   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1276   PetscCall(PetscLayoutSetUp(mat->rmap));
1277   PetscCall(PetscLayoutSetUp(mat->cmap));
1278   if (!a->A) {
1279     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1280     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1281   }
1282 #if defined(PETSC_HAVE_CUDA)
1283   PetscBool iscuda;
1284   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1285   if (iscuda) mtype = MATSEQDENSECUDA;
1286 #endif
1287 #if defined(PETSC_HAVE_HIP)
1288   PetscBool iship;
1289   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1290   if (iship) mtype = MATSEQDENSEHIP;
1291 #endif
1292   PetscCall(MatSetType(a->A, mtype));
1293   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1294 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1295   mat->offloadmask = a->A->offloadmask;
1296 #endif
1297   mat->preallocated = PETSC_TRUE;
1298   mat->assembled    = PETSC_TRUE;
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1303 {
1304   Mat B, C;
1305 
1306   PetscFunctionBegin;
1307   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1308   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1309   PetscCall(MatDestroy(&C));
1310   if (reuse == MAT_REUSE_MATRIX) {
1311     C = *newmat;
1312   } else C = NULL;
1313   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1314   PetscCall(MatDestroy(&B));
1315   if (reuse == MAT_INPLACE_MATRIX) {
1316     PetscCall(MatHeaderReplace(A, &C));
1317   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1318   PetscFunctionReturn(PETSC_SUCCESS);
1319 }
1320 
1321 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1322 {
1323   Mat B, C;
1324 
1325   PetscFunctionBegin;
1326   PetscCall(MatDenseGetLocalMatrix(A, &C));
1327   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1328   if (reuse == MAT_REUSE_MATRIX) {
1329     C = *newmat;
1330   } else C = NULL;
1331   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1332   PetscCall(MatDestroy(&B));
1333   if (reuse == MAT_INPLACE_MATRIX) {
1334     PetscCall(MatHeaderReplace(A, &C));
1335   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1336   PetscFunctionReturn(PETSC_SUCCESS);
1337 }
1338 
1339 #if defined(PETSC_HAVE_ELEMENTAL)
1340 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1341 {
1342   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1343   Mat           mat_elemental;
1344   PetscScalar  *v;
1345   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1346 
1347   PetscFunctionBegin;
1348   if (reuse == MAT_REUSE_MATRIX) {
1349     mat_elemental = *newmat;
1350     PetscCall(MatZeroEntries(*newmat));
1351   } else {
1352     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1353     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1354     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1355     PetscCall(MatSetUp(mat_elemental));
1356     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1357   }
1358 
1359   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1360   for (i = 0; i < N; i++) cols[i] = i;
1361   for (i = 0; i < m; i++) rows[i] = rstart + i;
1362 
1363   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1364   PetscCall(MatDenseGetArray(A, &v));
1365   PetscCall(MatDenseGetLDA(a->A, &lda));
1366   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1367   else {
1368     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1369   }
1370   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1371   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1372   PetscCall(MatDenseRestoreArray(A, &v));
1373   PetscCall(PetscFree2(rows, cols));
1374 
1375   if (reuse == MAT_INPLACE_MATRIX) {
1376     PetscCall(MatHeaderReplace(A, &mat_elemental));
1377   } else {
1378     *newmat = mat_elemental;
1379   }
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 #endif
1383 
1384 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1385 {
1386   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1390   PetscFunctionReturn(PETSC_SUCCESS);
1391 }
1392 
1393 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1394 {
1395   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1396 
1397   PetscFunctionBegin;
1398   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1403 {
1404   Mat_MPIDense *mat;
1405   PetscInt      m, nloc, N;
1406 
1407   PetscFunctionBegin;
1408   PetscCall(MatGetSize(inmat, &m, &N));
1409   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1410   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1411     PetscInt sum;
1412 
1413     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1414     /* Check sum(n) = N */
1415     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1416     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1417 
1418     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1419     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1420   }
1421 
1422   /* numeric phase */
1423   mat = (Mat_MPIDense *)(*outmat)->data;
1424   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1425   PetscFunctionReturn(PETSC_SUCCESS);
1426 }
1427 
1428 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1429 {
1430   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1431   PetscInt      lda;
1432 
1433   PetscFunctionBegin;
1434   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1435   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1436   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1437   a->vecinuse = col + 1;
1438   PetscCall(MatDenseGetLDA(a->A, &lda));
1439   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1440   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1441   *v = a->cvec;
1442   PetscFunctionReturn(PETSC_SUCCESS);
1443 }
1444 
1445 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1446 {
1447   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1448 
1449   PetscFunctionBegin;
1450   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1451   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1452   a->vecinuse = 0;
1453   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1454   PetscCall(VecResetArray(a->cvec));
1455   if (v) *v = NULL;
1456   PetscFunctionReturn(PETSC_SUCCESS);
1457 }
1458 
1459 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1460 {
1461   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1462   PetscInt      lda;
1463 
1464   PetscFunctionBegin;
1465   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1466   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1467   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1468   a->vecinuse = col + 1;
1469   PetscCall(MatDenseGetLDA(a->A, &lda));
1470   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1471   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1472   PetscCall(VecLockReadPush(a->cvec));
1473   *v = a->cvec;
1474   PetscFunctionReturn(PETSC_SUCCESS);
1475 }
1476 
1477 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1478 {
1479   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1480 
1481   PetscFunctionBegin;
1482   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1483   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1484   a->vecinuse = 0;
1485   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1486   PetscCall(VecLockReadPop(a->cvec));
1487   PetscCall(VecResetArray(a->cvec));
1488   if (v) *v = NULL;
1489   PetscFunctionReturn(PETSC_SUCCESS);
1490 }
1491 
1492 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1493 {
1494   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1495   PetscInt      lda;
1496 
1497   PetscFunctionBegin;
1498   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1499   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1500   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1501   a->vecinuse = col + 1;
1502   PetscCall(MatDenseGetLDA(a->A, &lda));
1503   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1504   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1505   *v = a->cvec;
1506   PetscFunctionReturn(PETSC_SUCCESS);
1507 }
1508 
1509 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1510 {
1511   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1512 
1513   PetscFunctionBegin;
1514   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1515   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1516   a->vecinuse = 0;
1517   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1518   PetscCall(VecResetArray(a->cvec));
1519   if (v) *v = NULL;
1520   PetscFunctionReturn(PETSC_SUCCESS);
1521 }
1522 
1523 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1524 {
1525   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1526   Mat_MPIDense *c;
1527   MPI_Comm      comm;
1528   PetscInt      pbegin, pend;
1529 
1530   PetscFunctionBegin;
1531   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1532   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1533   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1534   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1535   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1536   if (!a->cmat) {
1537     PetscCall(MatCreate(comm, &a->cmat));
1538     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1539     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1540     else {
1541       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1542       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1543       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1544     }
1545     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1546     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1547   } else {
1548     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1549     if (same && a->cmat->rmap->N != A->rmap->N) {
1550       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1551       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1552     }
1553     if (!same) {
1554       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1555       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1556       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1557       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1558       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1559     }
1560     if (cend - cbegin != a->cmat->cmap->N) {
1561       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1562       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1563       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1564       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1565     }
1566   }
1567   c = (Mat_MPIDense *)a->cmat->data;
1568   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1569   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1570 
1571   a->cmat->preallocated = PETSC_TRUE;
1572   a->cmat->assembled    = PETSC_TRUE;
1573 #if defined(PETSC_HAVE_DEVICE)
1574   a->cmat->offloadmask = c->A->offloadmask;
1575 #endif
1576   a->matinuse = cbegin + 1;
1577   *v          = a->cmat;
1578   PetscFunctionReturn(PETSC_SUCCESS);
1579 }
1580 
1581 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1582 {
1583   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1584   Mat_MPIDense *c;
1585 
1586   PetscFunctionBegin;
1587   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1588   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1589   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1590   a->matinuse = 0;
1591   c           = (Mat_MPIDense *)a->cmat->data;
1592   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1593   if (v) *v = NULL;
1594 #if defined(PETSC_HAVE_DEVICE)
1595   A->offloadmask = a->A->offloadmask;
1596 #endif
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 /*MC
1601    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1602 
1603    Options Database Key:
1604 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1605 
1606   Level: beginner
1607 
1608 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1609 M*/
1610 PetscErrorCode MatCreate_MPIDense(Mat mat)
1611 {
1612   Mat_MPIDense *a;
1613 
1614   PetscFunctionBegin;
1615   PetscCall(PetscNew(&a));
1616   mat->data   = (void *)a;
1617   mat->ops[0] = MatOps_Values;
1618 
1619   mat->insertmode = NOT_SET_VALUES;
1620 
1621   /* build cache for off array entries formed */
1622   a->donotstash = PETSC_FALSE;
1623 
1624   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1625 
1626   /* stuff used for matrix vector multiply */
1627   a->lvec        = NULL;
1628   a->Mvctx       = NULL;
1629   a->roworiented = PETSC_TRUE;
1630 
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1652 #if defined(PETSC_HAVE_ELEMENTAL)
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1654 #endif
1655 #if defined(PETSC_HAVE_SCALAPACK)
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1657 #endif
1658 #if defined(PETSC_HAVE_CUDA)
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1660 #endif
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1664 #if defined(PETSC_HAVE_CUDA)
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1667 #endif
1668 #if defined(PETSC_HAVE_HIP)
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1672 #endif
1673   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1675   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1676   PetscFunctionReturn(PETSC_SUCCESS);
1677 }
1678 
1679 /*MC
1680    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1681 
1682    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1683    and `MATMPIDENSE` otherwise.
1684 
1685    Options Database Key:
1686 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1687 
1688   Level: beginner
1689 
1690 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1691 M*/
1692 
1693 /*@C
1694   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1695 
1696   Collective
1697 
1698   Input Parameters:
1699 + B    - the matrix
1700 - data - optional location of matrix data.  Set to `NULL` for PETSc
1701    to control all matrix memory allocation.
1702 
1703   Level: intermediate
1704 
1705   Notes:
1706   The dense format is fully compatible with standard Fortran
1707   storage by columns.
1708 
1709   The data input variable is intended primarily for Fortran programmers
1710   who wish to allocate their own matrix memory space.  Most users should
1711   set `data` to `NULL`.
1712 
1713 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1714 @*/
1715 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1716 {
1717   PetscFunctionBegin;
1718   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1719   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1720   PetscFunctionReturn(PETSC_SUCCESS);
1721 }
1722 
1723 /*@
1724   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1725   array provided by the user. This is useful to avoid copying an array
1726   into a matrix
1727 
1728   Not Collective
1729 
1730   Input Parameters:
1731 + mat   - the matrix
1732 - array - the array in column major order
1733 
1734   Level: developer
1735 
1736   Note:
1737   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1738   freed when the matrix is destroyed.
1739 
1740 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1741           `MatDenseReplaceArray()`
1742 @*/
1743 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1744 {
1745   PetscFunctionBegin;
1746   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1747   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1748   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1749 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1750   mat->offloadmask = PETSC_OFFLOAD_CPU;
1751 #endif
1752   PetscFunctionReturn(PETSC_SUCCESS);
1753 }
1754 
1755 /*@
1756   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1757 
1758   Not Collective
1759 
1760   Input Parameter:
1761 . mat - the matrix
1762 
1763   Level: developer
1764 
1765   Note:
1766   You can only call this after a call to `MatDensePlaceArray()`
1767 
1768 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1769 @*/
1770 PetscErrorCode MatDenseResetArray(Mat mat)
1771 {
1772   PetscFunctionBegin;
1773   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1774   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1775   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1776   PetscFunctionReturn(PETSC_SUCCESS);
1777 }
1778 
1779 /*@
1780   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1781   array provided by the user. This is useful to avoid copying an array
1782   into a matrix
1783 
1784   Not Collective
1785 
1786   Input Parameters:
1787 + mat   - the matrix
1788 - array - the array in column major order
1789 
1790   Level: developer
1791 
1792   Note:
1793   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1794   freed by the user. It will be freed when the matrix is destroyed.
1795 
1796 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1797 @*/
1798 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1799 {
1800   PetscFunctionBegin;
1801   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1802   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1803   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1804 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1805   mat->offloadmask = PETSC_OFFLOAD_CPU;
1806 #endif
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 /*@C
1811   MatCreateDense - Creates a matrix in `MATDENSE` format.
1812 
1813   Collective
1814 
1815   Input Parameters:
1816 + comm - MPI communicator
1817 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1818 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1819 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1820 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1821 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1822    to control all matrix memory allocation.
1823 
1824   Output Parameter:
1825 . A - the matrix
1826 
1827   Level: intermediate
1828 
1829   Notes:
1830   The dense format is fully compatible with standard Fortran
1831   storage by columns.
1832 
1833   Although local portions of the matrix are stored in column-major
1834   order, the matrix is partitioned across MPI ranks by row.
1835 
1836   The data input variable is intended primarily for Fortran programmers
1837   who wish to allocate their own matrix memory space.  Most users should
1838   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1839 
1840   The user MUST specify either the local or global matrix dimensions
1841   (possibly both).
1842 
1843 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1844 @*/
1845 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1846 {
1847   PetscFunctionBegin;
1848   PetscCall(MatCreate(comm, A));
1849   PetscCall(MatSetSizes(*A, m, n, M, N));
1850   PetscCall(MatSetType(*A, MATDENSE));
1851   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1852   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1853   PetscFunctionReturn(PETSC_SUCCESS);
1854 }
1855 
1856 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1857 {
1858   Mat           mat;
1859   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1860 
1861   PetscFunctionBegin;
1862   *newmat = NULL;
1863   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1864   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1865   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1866   a = (Mat_MPIDense *)mat->data;
1867 
1868   mat->factortype   = A->factortype;
1869   mat->assembled    = PETSC_TRUE;
1870   mat->preallocated = PETSC_TRUE;
1871 
1872   mat->insertmode = NOT_SET_VALUES;
1873   a->donotstash   = oldmat->donotstash;
1874 
1875   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1876   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1877 
1878   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1879 
1880   *newmat = mat;
1881   PetscFunctionReturn(PETSC_SUCCESS);
1882 }
1883 
1884 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1885 {
1886   PetscBool isbinary;
1887 #if defined(PETSC_HAVE_HDF5)
1888   PetscBool ishdf5;
1889 #endif
1890 
1891   PetscFunctionBegin;
1892   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1893   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1894   /* force binary viewer to load .info file if it has not yet done so */
1895   PetscCall(PetscViewerSetUp(viewer));
1896   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1897 #if defined(PETSC_HAVE_HDF5)
1898   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1899 #endif
1900   if (isbinary) {
1901     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1902 #if defined(PETSC_HAVE_HDF5)
1903   } else if (ishdf5) {
1904     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1905 #endif
1906   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1907   PetscFunctionReturn(PETSC_SUCCESS);
1908 }
1909 
1910 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1911 {
1912   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1913   Mat           a, b;
1914 
1915   PetscFunctionBegin;
1916   a = matA->A;
1917   b = matB->A;
1918   PetscCall(MatEqual(a, b, flag));
1919   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1920   PetscFunctionReturn(PETSC_SUCCESS);
1921 }
1922 
1923 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1924 {
1925   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1926 
1927   PetscFunctionBegin;
1928   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1929   PetscCall(MatDestroy(&atb->atb));
1930   PetscCall(PetscFree(atb));
1931   PetscFunctionReturn(PETSC_SUCCESS);
1932 }
1933 
1934 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1935 {
1936   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1937 
1938   PetscFunctionBegin;
1939   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1940   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1941   PetscCall(PetscFree(abt));
1942   PetscFunctionReturn(PETSC_SUCCESS);
1943 }
1944 
1945 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1946 {
1947   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1948   Mat_TransMatMultDense *atb;
1949   MPI_Comm               comm;
1950   PetscMPIInt            size, *recvcounts;
1951   PetscScalar           *carray, *sendbuf;
1952   const PetscScalar     *atbarray;
1953   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1954   const PetscInt        *ranges;
1955 
1956   PetscFunctionBegin;
1957   MatCheckProduct(C, 3);
1958   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1959   atb        = (Mat_TransMatMultDense *)C->product->data;
1960   recvcounts = atb->recvcounts;
1961   sendbuf    = atb->sendbuf;
1962 
1963   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1964   PetscCallMPI(MPI_Comm_size(comm, &size));
1965 
1966   /* compute atbarray = aseq^T * bseq */
1967   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1968 
1969   PetscCall(MatGetOwnershipRanges(C, &ranges));
1970 
1971   /* arrange atbarray into sendbuf */
1972   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1973   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1974   for (proc = 0, k = 0; proc < size; proc++) {
1975     for (j = 0; j < cN; j++) {
1976       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1977     }
1978   }
1979   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1980 
1981   /* sum all atbarray to local values of C */
1982   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1983   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1984   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1985   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1986   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1987   PetscFunctionReturn(PETSC_SUCCESS);
1988 }
1989 
1990 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1991 {
1992   MPI_Comm               comm;
1993   PetscMPIInt            size;
1994   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1995   Mat_TransMatMultDense *atb;
1996   PetscBool              cisdense = PETSC_FALSE;
1997   PetscInt               i;
1998   const PetscInt        *ranges;
1999 
2000   PetscFunctionBegin;
2001   MatCheckProduct(C, 4);
2002   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2003   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2004   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2005     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2006   }
2007 
2008   /* create matrix product C */
2009   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2010 #if defined(PETSC_HAVE_CUDA)
2011   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2012 #endif
2013 #if defined(PETSC_HAVE_HIP)
2014   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2015 #endif
2016   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2017   PetscCall(MatSetUp(C));
2018 
2019   /* create data structure for reuse C */
2020   PetscCallMPI(MPI_Comm_size(comm, &size));
2021   PetscCall(PetscNew(&atb));
2022   cM = C->rmap->N;
2023   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2024   PetscCall(MatGetOwnershipRanges(C, &ranges));
2025   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2026 
2027   C->product->data    = atb;
2028   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2029   PetscFunctionReturn(PETSC_SUCCESS);
2030 }
2031 
2032 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2033 {
2034   MPI_Comm               comm;
2035   PetscMPIInt            i, size;
2036   PetscInt               maxRows, bufsiz;
2037   PetscMPIInt            tag;
2038   PetscInt               alg;
2039   Mat_MatTransMultDense *abt;
2040   Mat_Product           *product = C->product;
2041   PetscBool              flg;
2042 
2043   PetscFunctionBegin;
2044   MatCheckProduct(C, 4);
2045   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2046   /* check local size of A and B */
2047   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2048 
2049   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2050   alg = flg ? 0 : 1;
2051 
2052   /* setup matrix product C */
2053   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2054   PetscCall(MatSetType(C, MATMPIDENSE));
2055   PetscCall(MatSetUp(C));
2056   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2057 
2058   /* create data structure for reuse C */
2059   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2060   PetscCallMPI(MPI_Comm_size(comm, &size));
2061   PetscCall(PetscNew(&abt));
2062   abt->tag = tag;
2063   abt->alg = alg;
2064   switch (alg) {
2065   case 1: /* alg: "cyclic" */
2066     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2067     bufsiz = A->cmap->N * maxRows;
2068     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2069     break;
2070   default: /* alg: "allgatherv" */
2071     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2072     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2073     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2074     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2075     break;
2076   }
2077 
2078   C->product->data                = abt;
2079   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2080   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2081   PetscFunctionReturn(PETSC_SUCCESS);
2082 }
2083 
2084 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2085 {
2086   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2087   Mat_MatTransMultDense *abt;
2088   MPI_Comm               comm;
2089   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2090   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2091   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2092   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2093   const PetscScalar     *av, *bv;
2094   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2095   MPI_Request            reqs[2];
2096   const PetscInt        *ranges;
2097 
2098   PetscFunctionBegin;
2099   MatCheckProduct(C, 3);
2100   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2101   abt = (Mat_MatTransMultDense *)C->product->data;
2102   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2103   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2104   PetscCallMPI(MPI_Comm_size(comm, &size));
2105   PetscCall(MatDenseGetArrayRead(a->A, &av));
2106   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2107   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2108   PetscCall(MatDenseGetLDA(a->A, &i));
2109   PetscCall(PetscBLASIntCast(i, &alda));
2110   PetscCall(MatDenseGetLDA(b->A, &i));
2111   PetscCall(PetscBLASIntCast(i, &blda));
2112   PetscCall(MatDenseGetLDA(c->A, &i));
2113   PetscCall(PetscBLASIntCast(i, &clda));
2114   PetscCall(MatGetOwnershipRanges(B, &ranges));
2115   bn = B->rmap->n;
2116   if (blda == bn) {
2117     sendbuf = (PetscScalar *)bv;
2118   } else {
2119     sendbuf = abt->buf[0];
2120     for (k = 0, i = 0; i < cK; i++) {
2121       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2122     }
2123   }
2124   if (size > 1) {
2125     sendto   = (rank + size - 1) % size;
2126     recvfrom = (rank + size + 1) % size;
2127   } else {
2128     sendto = recvfrom = 0;
2129   }
2130   PetscCall(PetscBLASIntCast(cK, &ck));
2131   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2132   recvisfrom = rank;
2133   for (i = 0; i < size; i++) {
2134     /* we have finished receiving in sending, bufs can be read/modified */
2135     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2136     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2137 
2138     if (nextrecvisfrom != rank) {
2139       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2140       sendsiz = cK * bn;
2141       recvsiz = cK * nextbn;
2142       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2143       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2144       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2145     }
2146 
2147     /* local aseq * sendbuf^T */
2148     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2149     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2150 
2151     if (nextrecvisfrom != rank) {
2152       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2153       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2154     }
2155     bn         = nextbn;
2156     recvisfrom = nextrecvisfrom;
2157     sendbuf    = recvbuf;
2158   }
2159   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2160   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2161   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2162   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2163   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2164   PetscFunctionReturn(PETSC_SUCCESS);
2165 }
2166 
2167 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2168 {
2169   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2170   Mat_MatTransMultDense *abt;
2171   MPI_Comm               comm;
2172   PetscMPIInt            size;
2173   PetscScalar           *cv, *sendbuf, *recvbuf;
2174   const PetscScalar     *av, *bv;
2175   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2176   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2177   PetscBLASInt           cm, cn, ck, alda, clda;
2178 
2179   PetscFunctionBegin;
2180   MatCheckProduct(C, 3);
2181   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2182   abt = (Mat_MatTransMultDense *)C->product->data;
2183   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2184   PetscCallMPI(MPI_Comm_size(comm, &size));
2185   PetscCall(MatDenseGetArrayRead(a->A, &av));
2186   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2187   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2188   PetscCall(MatDenseGetLDA(a->A, &i));
2189   PetscCall(PetscBLASIntCast(i, &alda));
2190   PetscCall(MatDenseGetLDA(b->A, &blda));
2191   PetscCall(MatDenseGetLDA(c->A, &i));
2192   PetscCall(PetscBLASIntCast(i, &clda));
2193   /* copy transpose of B into buf[0] */
2194   bn      = B->rmap->n;
2195   sendbuf = abt->buf[0];
2196   recvbuf = abt->buf[1];
2197   for (k = 0, j = 0; j < bn; j++) {
2198     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2199   }
2200   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2201   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2202   PetscCall(PetscBLASIntCast(cK, &ck));
2203   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2204   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2205   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2206   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2207   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2208   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2209   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2210   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2211   PetscFunctionReturn(PETSC_SUCCESS);
2212 }
2213 
2214 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2215 {
2216   Mat_MatTransMultDense *abt;
2217 
2218   PetscFunctionBegin;
2219   MatCheckProduct(C, 3);
2220   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2221   abt = (Mat_MatTransMultDense *)C->product->data;
2222   switch (abt->alg) {
2223   case 1:
2224     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2225     break;
2226   default:
2227     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2228     break;
2229   }
2230   PetscFunctionReturn(PETSC_SUCCESS);
2231 }
2232 
2233 #if defined(PETSC_HAVE_ELEMENTAL)
2234 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2235 {
2236   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2237 
2238   PetscFunctionBegin;
2239   PetscCall(MatDestroy(&ab->Ce));
2240   PetscCall(MatDestroy(&ab->Ae));
2241   PetscCall(MatDestroy(&ab->Be));
2242   PetscCall(PetscFree(ab));
2243   PetscFunctionReturn(PETSC_SUCCESS);
2244 }
2245 
2246 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2247 {
2248   Mat_MatMultDense *ab;
2249 
2250   PetscFunctionBegin;
2251   MatCheckProduct(C, 3);
2252   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2253   ab = (Mat_MatMultDense *)C->product->data;
2254   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2255   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2256   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2257   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2258   PetscFunctionReturn(PETSC_SUCCESS);
2259 }
2260 
2261 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2262 {
2263   Mat               Ae, Be, Ce;
2264   Mat_MatMultDense *ab;
2265 
2266   PetscFunctionBegin;
2267   MatCheckProduct(C, 4);
2268   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2269   /* check local size of A and B */
2270   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2271     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2272   }
2273 
2274   /* create elemental matrices Ae and Be */
2275   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2276   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2277   PetscCall(MatSetType(Ae, MATELEMENTAL));
2278   PetscCall(MatSetUp(Ae));
2279   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2280 
2281   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2282   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2283   PetscCall(MatSetType(Be, MATELEMENTAL));
2284   PetscCall(MatSetUp(Be));
2285   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2286 
2287   /* compute symbolic Ce = Ae*Be */
2288   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2289   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2290 
2291   /* setup C */
2292   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2293   PetscCall(MatSetType(C, MATDENSE));
2294   PetscCall(MatSetUp(C));
2295 
2296   /* create data structure for reuse Cdense */
2297   PetscCall(PetscNew(&ab));
2298   ab->Ae = Ae;
2299   ab->Be = Be;
2300   ab->Ce = Ce;
2301 
2302   C->product->data       = ab;
2303   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2304   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2305   PetscFunctionReturn(PETSC_SUCCESS);
2306 }
2307 #endif
2308 
2309 #if defined(PETSC_HAVE_ELEMENTAL)
2310 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2311 {
2312   PetscFunctionBegin;
2313   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2314   C->ops->productsymbolic = MatProductSymbolic_AB;
2315   PetscFunctionReturn(PETSC_SUCCESS);
2316 }
2317 #endif
2318 
2319 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2320 {
2321   Mat_Product *product = C->product;
2322   Mat          A = product->A, B = product->B;
2323 
2324   PetscFunctionBegin;
2325   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2326     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2327   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2328   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2329   PetscFunctionReturn(PETSC_SUCCESS);
2330 }
2331 
2332 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2333 {
2334   Mat_Product *product     = C->product;
2335   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2336   PetscInt     alg, nalg = 2;
2337   PetscBool    flg = PETSC_FALSE;
2338 
2339   PetscFunctionBegin;
2340   /* Set default algorithm */
2341   alg = 0; /* default is allgatherv */
2342   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2343   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2344 
2345   /* Get runtime option */
2346   if (product->api_user) {
2347     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2348     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2349     PetscOptionsEnd();
2350   } else {
2351     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2352     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2353     PetscOptionsEnd();
2354   }
2355   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2356 
2357   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2358   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2359   PetscFunctionReturn(PETSC_SUCCESS);
2360 }
2361 
2362 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2363 {
2364   Mat_Product *product = C->product;
2365 
2366   PetscFunctionBegin;
2367   switch (product->type) {
2368 #if defined(PETSC_HAVE_ELEMENTAL)
2369   case MATPRODUCT_AB:
2370     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2371     break;
2372 #endif
2373   case MATPRODUCT_AtB:
2374     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2375     break;
2376   case MATPRODUCT_ABt:
2377     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2378     break;
2379   default:
2380     break;
2381   }
2382   PetscFunctionReturn(PETSC_SUCCESS);
2383 }
2384