xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6a8d16b2b41d0cef4718ae3fb870d9eabadeef99)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the sequential or MPI `MATDENSE` matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscValidPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
554   PetscScalar       *x;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecGetArray(v, &x));
559   PetscCall(VecGetSize(v, &ng));
560   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
561   PetscCall(VecGetLocalSize(v, &nl));
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(PetscArrayzero(x + i, nl - i));
569   PetscCall(VecRestoreArray(v, &x));
570   PetscFunctionReturn(PETSC_SUCCESS);
571 }
572 
573 PetscErrorCode MatDestroy_MPIDense(Mat mat)
574 {
575   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
576 
577   PetscFunctionBegin;
578 #if defined(PETSC_USE_LOG)
579   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
580 #endif
581   PetscCall(MatStashDestroy_Private(&mat->stash));
582   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
583   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
584   PetscCall(MatDestroy(&mdn->A));
585   PetscCall(VecDestroy(&mdn->lvec));
586   PetscCall(PetscSFDestroy(&mdn->Mvctx));
587   PetscCall(VecDestroy(&mdn->cvec));
588   PetscCall(MatDestroy(&mdn->cmat));
589 
590   PetscCall(PetscFree(mat->data));
591   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
592 
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
605   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
606 #if defined(PETSC_HAVE_ELEMENTAL)
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
608 #endif
609 #if defined(PETSC_HAVE_SCALAPACK)
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
611 #endif
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
615 #if defined(PETSC_HAVE_CUDA)
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
634 #endif
635 #if defined(PETSC_HAVE_HIP)
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
654 #endif
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
665 
666   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669 
670 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
671 
672 #include <petscdraw.h>
673 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
674 {
675   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
676   PetscMPIInt       rank;
677   PetscViewerType   vtype;
678   PetscBool         iascii, isdraw;
679   PetscViewer       sviewer;
680   PetscViewerFormat format;
681 
682   PetscFunctionBegin;
683   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
684   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
685   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
686   if (iascii) {
687     PetscCall(PetscViewerGetType(viewer, &vtype));
688     PetscCall(PetscViewerGetFormat(viewer, &format));
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
692       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
693       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
694                                                    (PetscInt)info.memory));
695       PetscCall(PetscViewerFlush(viewer));
696       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
697       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
698       PetscFunctionReturn(PETSC_SUCCESS);
699     } else if (format == PETSC_VIEWER_ASCII_INFO) {
700       PetscFunctionReturn(PETSC_SUCCESS);
701     }
702   } else if (isdraw) {
703     PetscDraw draw;
704     PetscBool isnull;
705 
706     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
707     PetscCall(PetscDrawIsNull(draw, &isnull));
708     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
709   }
710 
711   {
712     /* assemble the entire matrix onto first processor. */
713     Mat          A;
714     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
715     PetscInt    *cols;
716     PetscScalar *vals;
717 
718     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
719     if (rank == 0) {
720       PetscCall(MatSetSizes(A, M, N, M, N));
721     } else {
722       PetscCall(MatSetSizes(A, 0, 0, M, N));
723     }
724     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
725     PetscCall(MatSetType(A, MATMPIDENSE));
726     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
727 
728     /* Copy the matrix ... This isn't the most efficient means,
729        but it's quick for now */
730     A->insertmode = INSERT_VALUES;
731 
732     row = mat->rmap->rstart;
733     m   = mdn->A->rmap->n;
734     for (i = 0; i < m; i++) {
735       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
736       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
737       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
738       row++;
739     }
740 
741     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
742     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
743     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
744     if (rank == 0) {
745       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
746       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
747     }
748     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
749     PetscCall(PetscViewerFlush(viewer));
750     PetscCall(MatDestroy(&A));
751   }
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
756 {
757   PetscBool iascii, isbinary, isdraw, issocket;
758 
759   PetscFunctionBegin;
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
761   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
762   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
763   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
764 
765   if (iascii || issocket || isdraw) {
766     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
767   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
768   PetscFunctionReturn(PETSC_SUCCESS);
769 }
770 
771 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
772 {
773   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
774   Mat            mdn = mat->A;
775   PetscLogDouble isend[5], irecv[5];
776 
777   PetscFunctionBegin;
778   info->block_size = 1.0;
779 
780   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
781 
782   isend[0] = info->nz_used;
783   isend[1] = info->nz_allocated;
784   isend[2] = info->nz_unneeded;
785   isend[3] = info->memory;
786   isend[4] = info->mallocs;
787   if (flag == MAT_LOCAL) {
788     info->nz_used      = isend[0];
789     info->nz_allocated = isend[1];
790     info->nz_unneeded  = isend[2];
791     info->memory       = isend[3];
792     info->mallocs      = isend[4];
793   } else if (flag == MAT_GLOBAL_MAX) {
794     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
795 
796     info->nz_used      = irecv[0];
797     info->nz_allocated = irecv[1];
798     info->nz_unneeded  = irecv[2];
799     info->memory       = irecv[3];
800     info->mallocs      = irecv[4];
801   } else if (flag == MAT_GLOBAL_SUM) {
802     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
803 
804     info->nz_used      = irecv[0];
805     info->nz_allocated = irecv[1];
806     info->nz_unneeded  = irecv[2];
807     info->memory       = irecv[3];
808     info->mallocs      = irecv[4];
809   }
810   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
811   info->fill_ratio_needed = 0;
812   info->factor_mallocs    = 0;
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
817 {
818   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
819 
820   PetscFunctionBegin;
821   switch (op) {
822   case MAT_NEW_NONZERO_LOCATIONS:
823   case MAT_NEW_NONZERO_LOCATION_ERR:
824   case MAT_NEW_NONZERO_ALLOCATION_ERR:
825     MatCheckPreallocated(A, 1);
826     PetscCall(MatSetOption(a->A, op, flg));
827     break;
828   case MAT_ROW_ORIENTED:
829     MatCheckPreallocated(A, 1);
830     a->roworiented = flg;
831     PetscCall(MatSetOption(a->A, op, flg));
832     break;
833   case MAT_FORCE_DIAGONAL_ENTRIES:
834   case MAT_KEEP_NONZERO_PATTERN:
835   case MAT_USE_HASH_TABLE:
836   case MAT_SORTED_FULL:
837     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
838     break;
839   case MAT_IGNORE_OFF_PROC_ENTRIES:
840     a->donotstash = flg;
841     break;
842   case MAT_SYMMETRIC:
843   case MAT_STRUCTURALLY_SYMMETRIC:
844   case MAT_HERMITIAN:
845   case MAT_SYMMETRY_ETERNAL:
846   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
847   case MAT_SPD:
848   case MAT_IGNORE_LOWER_TRIANGULAR:
849   case MAT_IGNORE_ZERO_ENTRIES:
850   case MAT_SPD_ETERNAL:
851     /* if the diagonal matrix is square it inherits some of the properties above */
852     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
853     break;
854   default:
855     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
856   }
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
860 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
861 {
862   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
863   const PetscScalar *l;
864   PetscScalar        x, *v, *vv, *r;
865   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
866 
867   PetscFunctionBegin;
868   PetscCall(MatDenseGetArray(mdn->A, &vv));
869   PetscCall(MatDenseGetLDA(mdn->A, &lda));
870   PetscCall(MatGetLocalSize(A, &s2, &s3));
871   if (ll) {
872     PetscCall(VecGetLocalSize(ll, &s2a));
873     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
874     PetscCall(VecGetArrayRead(ll, &l));
875     for (i = 0; i < m; i++) {
876       x = l[i];
877       v = vv + i;
878       for (j = 0; j < n; j++) {
879         (*v) *= x;
880         v += lda;
881       }
882     }
883     PetscCall(VecRestoreArrayRead(ll, &l));
884     PetscCall(PetscLogFlops(1.0 * n * m));
885   }
886   if (rr) {
887     const PetscScalar *ar;
888 
889     PetscCall(VecGetLocalSize(rr, &s3a));
890     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
891     PetscCall(VecGetArrayRead(rr, &ar));
892     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
893     PetscCall(VecGetArray(mdn->lvec, &r));
894     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
895     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
896     PetscCall(VecRestoreArrayRead(rr, &ar));
897     for (i = 0; i < n; i++) {
898       x = r[i];
899       v = vv + i * lda;
900       for (j = 0; j < m; j++) (*v++) *= x;
901     }
902     PetscCall(VecRestoreArray(mdn->lvec, &r));
903     PetscCall(PetscLogFlops(1.0 * n * m));
904   }
905   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
906   PetscFunctionReturn(PETSC_SUCCESS);
907 }
908 
909 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
910 {
911   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
912   PetscInt           i, j;
913   PetscMPIInt        size;
914   PetscReal          sum = 0.0;
915   const PetscScalar *av, *v;
916 
917   PetscFunctionBegin;
918   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
919   v = av;
920   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
921   if (size == 1) {
922     PetscCall(MatNorm(mdn->A, type, nrm));
923   } else {
924     if (type == NORM_FROBENIUS) {
925       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
926         sum += PetscRealPart(PetscConj(*v) * (*v));
927         v++;
928       }
929       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
930       *nrm = PetscSqrtReal(*nrm);
931       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
932     } else if (type == NORM_1) {
933       PetscReal *tmp, *tmp2;
934       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
935       *nrm = 0.0;
936       v    = av;
937       for (j = 0; j < mdn->A->cmap->n; j++) {
938         for (i = 0; i < mdn->A->rmap->n; i++) {
939           tmp[j] += PetscAbsScalar(*v);
940           v++;
941         }
942       }
943       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
944       for (j = 0; j < A->cmap->N; j++) {
945         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946       }
947       PetscCall(PetscFree2(tmp, tmp2));
948       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
949     } else if (type == NORM_INFINITY) { /* max row norm */
950       PetscReal ntemp;
951       PetscCall(MatNorm(mdn->A, type, &ntemp));
952       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
953     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
954   }
955   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
956   PetscFunctionReturn(PETSC_SUCCESS);
957 }
958 
959 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
960 {
961   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
962   Mat           B;
963   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
964   PetscInt      j, i, lda;
965   PetscScalar  *v;
966 
967   PetscFunctionBegin;
968   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
970     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
971     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
972     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
973     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
974   } else B = *matout;
975 
976   m = a->A->rmap->n;
977   n = a->A->cmap->n;
978   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
979   PetscCall(MatDenseGetLDA(a->A, &lda));
980   PetscCall(PetscMalloc1(m, &rwork));
981   for (i = 0; i < m; i++) rwork[i] = rstart + i;
982   for (j = 0; j < n; j++) {
983     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
984     v += lda;
985   }
986   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
987   PetscCall(PetscFree(rwork));
988   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
989   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
990   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
991     *matout = B;
992   } else {
993     PetscCall(MatHeaderMerge(A, &B));
994   }
995   PetscFunctionReturn(PETSC_SUCCESS);
996 }
997 
998 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
999 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1000 
1001 PetscErrorCode MatSetUp_MPIDense(Mat A)
1002 {
1003   PetscFunctionBegin;
1004   PetscCall(PetscLayoutSetUp(A->rmap));
1005   PetscCall(PetscLayoutSetUp(A->cmap));
1006   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
1010 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1011 {
1012   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1013 
1014   PetscFunctionBegin;
1015   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1016   PetscFunctionReturn(PETSC_SUCCESS);
1017 }
1018 
1019 PetscErrorCode MatConjugate_MPIDense(Mat mat)
1020 {
1021   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1022 
1023   PetscFunctionBegin;
1024   PetscCall(MatConjugate(a->A));
1025   PetscFunctionReturn(PETSC_SUCCESS);
1026 }
1027 
1028 PetscErrorCode MatRealPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1031 
1032   PetscFunctionBegin;
1033   PetscCall(MatRealPart(a->A));
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1038 {
1039   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1040 
1041   PetscFunctionBegin;
1042   PetscCall(MatImaginaryPart(a->A));
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1047 {
1048   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1049 
1050   PetscFunctionBegin;
1051   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1052   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1053   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1058 
1059 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1060 {
1061   PetscInt      i, m, n;
1062   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1063   PetscReal    *work;
1064 
1065   PetscFunctionBegin;
1066   PetscCall(MatGetSize(A, &m, &n));
1067   PetscCall(PetscMalloc1(n, &work));
1068   if (type == REDUCTION_MEAN_REALPART) {
1069     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1070   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1071     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1072   } else {
1073     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1074   }
1075   if (type == NORM_2) {
1076     for (i = 0; i < n; i++) work[i] *= work[i];
1077   }
1078   if (type == NORM_INFINITY) {
1079     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1080   } else {
1081     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1082   }
1083   PetscCall(PetscFree(work));
1084   if (type == NORM_2) {
1085     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1086   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1087     for (i = 0; i < n; i++) reductions[i] /= m;
1088   }
1089   PetscFunctionReturn(PETSC_SUCCESS);
1090 }
1091 
1092 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1093 {
1094   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1095 
1096   PetscFunctionBegin;
1097   PetscCall(MatSetRandom(d->A, rctx));
1098 #if defined(PETSC_HAVE_DEVICE)
1099   x->offloadmask = d->A->offloadmask;
1100 #endif
1101   PetscFunctionReturn(PETSC_SUCCESS);
1102 }
1103 
1104 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1105 {
1106   PetscFunctionBegin;
1107   *missing = PETSC_FALSE;
1108   PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110 
1111 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1112 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1113 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1114 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1115 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1116 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1117 
1118 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1119                                        MatGetRow_MPIDense,
1120                                        MatRestoreRow_MPIDense,
1121                                        MatMult_MPIDense,
1122                                        /*  4*/ MatMultAdd_MPIDense,
1123                                        MatMultTranspose_MPIDense,
1124                                        MatMultTransposeAdd_MPIDense,
1125                                        NULL,
1126                                        NULL,
1127                                        NULL,
1128                                        /* 10*/ NULL,
1129                                        NULL,
1130                                        NULL,
1131                                        NULL,
1132                                        MatTranspose_MPIDense,
1133                                        /* 15*/ MatGetInfo_MPIDense,
1134                                        MatEqual_MPIDense,
1135                                        MatGetDiagonal_MPIDense,
1136                                        MatDiagonalScale_MPIDense,
1137                                        MatNorm_MPIDense,
1138                                        /* 20*/ MatAssemblyBegin_MPIDense,
1139                                        MatAssemblyEnd_MPIDense,
1140                                        MatSetOption_MPIDense,
1141                                        MatZeroEntries_MPIDense,
1142                                        /* 24*/ MatZeroRows_MPIDense,
1143                                        NULL,
1144                                        NULL,
1145                                        NULL,
1146                                        NULL,
1147                                        /* 29*/ MatSetUp_MPIDense,
1148                                        NULL,
1149                                        NULL,
1150                                        MatGetDiagonalBlock_MPIDense,
1151                                        NULL,
1152                                        /* 34*/ MatDuplicate_MPIDense,
1153                                        NULL,
1154                                        NULL,
1155                                        NULL,
1156                                        NULL,
1157                                        /* 39*/ MatAXPY_MPIDense,
1158                                        MatCreateSubMatrices_MPIDense,
1159                                        NULL,
1160                                        MatGetValues_MPIDense,
1161                                        MatCopy_MPIDense,
1162                                        /* 44*/ NULL,
1163                                        MatScale_MPIDense,
1164                                        MatShift_MPIDense,
1165                                        NULL,
1166                                        NULL,
1167                                        /* 49*/ MatSetRandom_MPIDense,
1168                                        NULL,
1169                                        NULL,
1170                                        NULL,
1171                                        NULL,
1172                                        /* 54*/ NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        NULL,
1176                                        NULL,
1177                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1178                                        MatDestroy_MPIDense,
1179                                        MatView_MPIDense,
1180                                        NULL,
1181                                        NULL,
1182                                        /* 64*/ NULL,
1183                                        NULL,
1184                                        NULL,
1185                                        NULL,
1186                                        NULL,
1187                                        /* 69*/ NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        NULL,
1191                                        NULL,
1192                                        /* 74*/ NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        NULL,
1197                                        /* 79*/ NULL,
1198                                        NULL,
1199                                        NULL,
1200                                        NULL,
1201                                        /* 83*/ MatLoad_MPIDense,
1202                                        NULL,
1203                                        NULL,
1204                                        NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        /* 89*/ NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        NULL,
1211                                        NULL,
1212                                        /* 94*/ NULL,
1213                                        NULL,
1214                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1215                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1216                                        NULL,
1217                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1218                                        NULL,
1219                                        NULL,
1220                                        MatConjugate_MPIDense,
1221                                        NULL,
1222                                        /*104*/ NULL,
1223                                        MatRealPart_MPIDense,
1224                                        MatImaginaryPart_MPIDense,
1225                                        NULL,
1226                                        NULL,
1227                                        /*109*/ NULL,
1228                                        NULL,
1229                                        NULL,
1230                                        MatGetColumnVector_MPIDense,
1231                                        MatMissingDiagonal_MPIDense,
1232                                        /*114*/ NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        NULL,
1237                                        /*119*/ NULL,
1238                                        NULL,
1239                                        NULL,
1240                                        NULL,
1241                                        NULL,
1242                                        /*124*/ NULL,
1243                                        MatGetColumnReductions_MPIDense,
1244                                        NULL,
1245                                        NULL,
1246                                        NULL,
1247                                        /*129*/ NULL,
1248                                        NULL,
1249                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1250                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1251                                        NULL,
1252                                        /*134*/ NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        /*139*/ NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        NULL,
1261                                        NULL,
1262                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1263                                        /*145*/ NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        NULL,
1267                                        NULL,
1268                                        /*150*/ NULL,
1269                                        NULL};
1270 
1271 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1272 {
1273   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1274   MatType       mtype = MATSEQDENSE;
1275 
1276   PetscFunctionBegin;
1277   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1278   PetscCall(PetscLayoutSetUp(mat->rmap));
1279   PetscCall(PetscLayoutSetUp(mat->cmap));
1280   if (!a->A) {
1281     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1282     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1283   }
1284 #if defined(PETSC_HAVE_CUDA)
1285   PetscBool iscuda;
1286   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1287   if (iscuda) mtype = MATSEQDENSECUDA;
1288 #endif
1289 #if defined(PETSC_HAVE_HIP)
1290   PetscBool iship;
1291   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1292   if (iship) mtype = MATSEQDENSEHIP;
1293 #endif
1294   PetscCall(MatSetType(a->A, mtype));
1295   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1296 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1297   mat->offloadmask = a->A->offloadmask;
1298 #endif
1299   mat->preallocated = PETSC_TRUE;
1300   mat->assembled    = PETSC_TRUE;
1301   PetscFunctionReturn(PETSC_SUCCESS);
1302 }
1303 
1304 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1305 {
1306   Mat B, C;
1307 
1308   PetscFunctionBegin;
1309   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1310   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1311   PetscCall(MatDestroy(&C));
1312   if (reuse == MAT_REUSE_MATRIX) {
1313     C = *newmat;
1314   } else C = NULL;
1315   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1316   PetscCall(MatDestroy(&B));
1317   if (reuse == MAT_INPLACE_MATRIX) {
1318     PetscCall(MatHeaderReplace(A, &C));
1319   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1320   PetscFunctionReturn(PETSC_SUCCESS);
1321 }
1322 
1323 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1324 {
1325   Mat B, C;
1326 
1327   PetscFunctionBegin;
1328   PetscCall(MatDenseGetLocalMatrix(A, &C));
1329   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1330   if (reuse == MAT_REUSE_MATRIX) {
1331     C = *newmat;
1332   } else C = NULL;
1333   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1334   PetscCall(MatDestroy(&B));
1335   if (reuse == MAT_INPLACE_MATRIX) {
1336     PetscCall(MatHeaderReplace(A, &C));
1337   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1338   PetscFunctionReturn(PETSC_SUCCESS);
1339 }
1340 
1341 #if defined(PETSC_HAVE_ELEMENTAL)
1342 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1343 {
1344   Mat          mat_elemental;
1345   PetscScalar *v;
1346   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1347 
1348   PetscFunctionBegin;
1349   if (reuse == MAT_REUSE_MATRIX) {
1350     mat_elemental = *newmat;
1351     PetscCall(MatZeroEntries(*newmat));
1352   } else {
1353     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1354     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1355     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1356     PetscCall(MatSetUp(mat_elemental));
1357     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1358   }
1359 
1360   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1361   for (i = 0; i < N; i++) cols[i] = i;
1362   for (i = 0; i < m; i++) rows[i] = rstart + i;
1363 
1364   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1365   PetscCall(MatDenseGetArray(A, &v));
1366   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1367   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1368   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1369   PetscCall(MatDenseRestoreArray(A, &v));
1370   PetscCall(PetscFree2(rows, cols));
1371 
1372   if (reuse == MAT_INPLACE_MATRIX) {
1373     PetscCall(MatHeaderReplace(A, &mat_elemental));
1374   } else {
1375     *newmat = mat_elemental;
1376   }
1377   PetscFunctionReturn(PETSC_SUCCESS);
1378 }
1379 #endif
1380 
1381 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1382 {
1383   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1384 
1385   PetscFunctionBegin;
1386   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1387   PetscFunctionReturn(PETSC_SUCCESS);
1388 }
1389 
1390 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1391 {
1392   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1393 
1394   PetscFunctionBegin;
1395   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1400 {
1401   Mat_MPIDense *mat;
1402   PetscInt      m, nloc, N;
1403 
1404   PetscFunctionBegin;
1405   PetscCall(MatGetSize(inmat, &m, &N));
1406   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1407   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1408     PetscInt sum;
1409 
1410     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1411     /* Check sum(n) = N */
1412     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1413     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1414 
1415     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1416     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1417   }
1418 
1419   /* numeric phase */
1420   mat = (Mat_MPIDense *)(*outmat)->data;
1421   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1422   PetscFunctionReturn(PETSC_SUCCESS);
1423 }
1424 
1425 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1426 {
1427   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1428   PetscInt      lda;
1429 
1430   PetscFunctionBegin;
1431   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1432   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1433   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1434   a->vecinuse = col + 1;
1435   PetscCall(MatDenseGetLDA(a->A, &lda));
1436   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1437   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1438   *v = a->cvec;
1439   PetscFunctionReturn(PETSC_SUCCESS);
1440 }
1441 
1442 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1443 {
1444   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1445 
1446   PetscFunctionBegin;
1447   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1448   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1449   a->vecinuse = 0;
1450   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1451   PetscCall(VecResetArray(a->cvec));
1452   if (v) *v = NULL;
1453   PetscFunctionReturn(PETSC_SUCCESS);
1454 }
1455 
1456 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1457 {
1458   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1459   PetscInt      lda;
1460 
1461   PetscFunctionBegin;
1462   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1463   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1464   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1465   a->vecinuse = col + 1;
1466   PetscCall(MatDenseGetLDA(a->A, &lda));
1467   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1468   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1469   PetscCall(VecLockReadPush(a->cvec));
1470   *v = a->cvec;
1471   PetscFunctionReturn(PETSC_SUCCESS);
1472 }
1473 
1474 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1475 {
1476   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1477 
1478   PetscFunctionBegin;
1479   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1480   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1481   a->vecinuse = 0;
1482   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1483   PetscCall(VecLockReadPop(a->cvec));
1484   PetscCall(VecResetArray(a->cvec));
1485   if (v) *v = NULL;
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 
1489 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1490 {
1491   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1492   PetscInt      lda;
1493 
1494   PetscFunctionBegin;
1495   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1496   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1497   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1498   a->vecinuse = col + 1;
1499   PetscCall(MatDenseGetLDA(a->A, &lda));
1500   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1501   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1502   *v = a->cvec;
1503   PetscFunctionReturn(PETSC_SUCCESS);
1504 }
1505 
1506 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1507 {
1508   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1509 
1510   PetscFunctionBegin;
1511   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1512   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1513   a->vecinuse = 0;
1514   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1515   PetscCall(VecResetArray(a->cvec));
1516   if (v) *v = NULL;
1517   PetscFunctionReturn(PETSC_SUCCESS);
1518 }
1519 
1520 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1521 {
1522   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1523   Mat_MPIDense *c;
1524   MPI_Comm      comm;
1525   PetscInt      pbegin, pend;
1526 
1527   PetscFunctionBegin;
1528   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1529   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1530   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1531   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1532   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1533   if (!a->cmat) {
1534     PetscCall(MatCreate(comm, &a->cmat));
1535     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1536     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1537     else {
1538       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1539       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1540       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1541     }
1542     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1543     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1544   } else {
1545     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1546     if (same && a->cmat->rmap->N != A->rmap->N) {
1547       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1548       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1549     }
1550     if (!same) {
1551       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1552       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1553       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1554       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1555       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1556     }
1557     if (cend - cbegin != a->cmat->cmap->N) {
1558       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1559       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1560       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1561       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1562     }
1563   }
1564   c = (Mat_MPIDense *)a->cmat->data;
1565   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1566   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1567 
1568   a->cmat->preallocated = PETSC_TRUE;
1569   a->cmat->assembled    = PETSC_TRUE;
1570 #if defined(PETSC_HAVE_DEVICE)
1571   a->cmat->offloadmask = c->A->offloadmask;
1572 #endif
1573   a->matinuse = cbegin + 1;
1574   *v          = a->cmat;
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1579 {
1580   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1581   Mat_MPIDense *c;
1582 
1583   PetscFunctionBegin;
1584   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1585   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1586   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1587   a->matinuse = 0;
1588   c           = (Mat_MPIDense *)a->cmat->data;
1589   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1590   if (v) *v = NULL;
1591 #if defined(PETSC_HAVE_DEVICE)
1592   A->offloadmask = a->A->offloadmask;
1593 #endif
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 /*MC
1598    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1599 
1600    Options Database Key:
1601 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1602 
1603   Level: beginner
1604 
1605 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1606 M*/
1607 PetscErrorCode MatCreate_MPIDense(Mat mat)
1608 {
1609   Mat_MPIDense *a;
1610 
1611   PetscFunctionBegin;
1612   PetscCall(PetscNew(&a));
1613   mat->data   = (void *)a;
1614   mat->ops[0] = MatOps_Values;
1615 
1616   mat->insertmode = NOT_SET_VALUES;
1617 
1618   /* build cache for off array entries formed */
1619   a->donotstash = PETSC_FALSE;
1620 
1621   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1622 
1623   /* stuff used for matrix vector multiply */
1624   a->lvec        = NULL;
1625   a->Mvctx       = NULL;
1626   a->roworiented = PETSC_TRUE;
1627 
1628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1649 #if defined(PETSC_HAVE_ELEMENTAL)
1650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1651 #endif
1652 #if defined(PETSC_HAVE_SCALAPACK)
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1654 #endif
1655 #if defined(PETSC_HAVE_CUDA)
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1657 #endif
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1661 #if defined(PETSC_HAVE_CUDA)
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1664 #endif
1665 #if defined(PETSC_HAVE_HIP)
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1669 #endif
1670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1672   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1673   PetscFunctionReturn(PETSC_SUCCESS);
1674 }
1675 
1676 /*MC
1677    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1678 
1679    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1680    and `MATMPIDENSE` otherwise.
1681 
1682    Options Database Key:
1683 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1684 
1685   Level: beginner
1686 
1687 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1688 M*/
1689 
1690 /*@C
1691    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1692 
1693    Collective
1694 
1695    Input Parameters:
1696 .  B - the matrix
1697 -  data - optional location of matrix data.  Set to `NULL` for PETSc
1698    to control all matrix memory allocation.
1699 
1700    Level: intermediate
1701 
1702    Notes:
1703    The dense format is fully compatible with standard Fortran
1704    storage by columns.
1705 
1706    The data input variable is intended primarily for Fortran programmers
1707    who wish to allocate their own matrix memory space.  Most users should
1708    set `data` to `NULL`.
1709 
1710 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1711 @*/
1712 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1713 {
1714   PetscFunctionBegin;
1715   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1716   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1717   PetscFunctionReturn(PETSC_SUCCESS);
1718 }
1719 
1720 /*@
1721    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1722    array provided by the user. This is useful to avoid copying an array
1723    into a matrix
1724 
1725    Not Collective
1726 
1727    Input Parameters:
1728 +  mat - the matrix
1729 -  array - the array in column major order
1730 
1731    Level: developer
1732 
1733    Note:
1734    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1735    freed when the matrix is destroyed.
1736 
1737 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1738           `MatDenseReplaceArray()`
1739 @*/
1740 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1741 {
1742   PetscFunctionBegin;
1743   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1744   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1745   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1746 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1747   mat->offloadmask = PETSC_OFFLOAD_CPU;
1748 #endif
1749   PetscFunctionReturn(PETSC_SUCCESS);
1750 }
1751 
1752 /*@
1753    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1754 
1755    Not Collective
1756 
1757    Input Parameter:
1758 .  mat - the matrix
1759 
1760    Level: developer
1761 
1762    Note:
1763    You can only call this after a call to `MatDensePlaceArray()`
1764 
1765 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1766 @*/
1767 PetscErrorCode MatDenseResetArray(Mat mat)
1768 {
1769   PetscFunctionBegin;
1770   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1771   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1772   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1773   PetscFunctionReturn(PETSC_SUCCESS);
1774 }
1775 
1776 /*@
1777    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1778    array provided by the user. This is useful to avoid copying an array
1779    into a matrix
1780 
1781    Not Collective
1782 
1783    Input Parameters:
1784 +  mat - the matrix
1785 -  array - the array in column major order
1786 
1787    Level: developer
1788 
1789    Note:
1790    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1791    freed by the user. It will be freed when the matrix is destroyed.
1792 
1793 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1794 @*/
1795 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1796 {
1797   PetscFunctionBegin;
1798   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1799   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1800   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1801 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1802   mat->offloadmask = PETSC_OFFLOAD_CPU;
1803 #endif
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 /*@C
1808    MatCreateDense - Creates a matrix in `MATDENSE` format.
1809 
1810    Collective
1811 
1812    Input Parameters:
1813 +  comm - MPI communicator
1814 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1815 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1816 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1817 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1818 -  data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1819    to control all matrix memory allocation.
1820 
1821    Output Parameter:
1822 .  A - the matrix
1823 
1824    Level: intermediate
1825 
1826    Notes:
1827    The dense format is fully compatible with standard Fortran
1828    storage by columns.
1829 
1830    Although local portions of the matrix are stored in column-major
1831    order, the matrix is partitioned across MPI ranks by row.
1832 
1833    The data input variable is intended primarily for Fortran programmers
1834    who wish to allocate their own matrix memory space.  Most users should
1835    set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1836 
1837    The user MUST specify either the local or global matrix dimensions
1838    (possibly both).
1839 
1840 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1841 @*/
1842 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1843 {
1844   PetscFunctionBegin;
1845   PetscCall(MatCreate(comm, A));
1846   PetscCall(MatSetSizes(*A, m, n, M, N));
1847   PetscCall(MatSetType(*A, MATDENSE));
1848   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1849   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1850   PetscFunctionReturn(PETSC_SUCCESS);
1851 }
1852 
1853 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1854 {
1855   Mat           mat;
1856   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1857 
1858   PetscFunctionBegin;
1859   *newmat = NULL;
1860   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1861   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1862   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1863   a = (Mat_MPIDense *)mat->data;
1864 
1865   mat->factortype   = A->factortype;
1866   mat->assembled    = PETSC_TRUE;
1867   mat->preallocated = PETSC_TRUE;
1868 
1869   mat->insertmode = NOT_SET_VALUES;
1870   a->donotstash   = oldmat->donotstash;
1871 
1872   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1873   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1874 
1875   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1876 
1877   *newmat = mat;
1878   PetscFunctionReturn(PETSC_SUCCESS);
1879 }
1880 
1881 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1882 {
1883   PetscBool isbinary;
1884 #if defined(PETSC_HAVE_HDF5)
1885   PetscBool ishdf5;
1886 #endif
1887 
1888   PetscFunctionBegin;
1889   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1890   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1891   /* force binary viewer to load .info file if it has not yet done so */
1892   PetscCall(PetscViewerSetUp(viewer));
1893   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1894 #if defined(PETSC_HAVE_HDF5)
1895   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1896 #endif
1897   if (isbinary) {
1898     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1899 #if defined(PETSC_HAVE_HDF5)
1900   } else if (ishdf5) {
1901     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1902 #endif
1903   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1904   PetscFunctionReturn(PETSC_SUCCESS);
1905 }
1906 
1907 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1908 {
1909   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1910   Mat           a, b;
1911 
1912   PetscFunctionBegin;
1913   a = matA->A;
1914   b = matB->A;
1915   PetscCall(MatEqual(a, b, flag));
1916   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1917   PetscFunctionReturn(PETSC_SUCCESS);
1918 }
1919 
1920 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1921 {
1922   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1923 
1924   PetscFunctionBegin;
1925   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1926   PetscCall(MatDestroy(&atb->atb));
1927   PetscCall(PetscFree(atb));
1928   PetscFunctionReturn(PETSC_SUCCESS);
1929 }
1930 
1931 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1932 {
1933   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1934 
1935   PetscFunctionBegin;
1936   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1937   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1938   PetscCall(PetscFree(abt));
1939   PetscFunctionReturn(PETSC_SUCCESS);
1940 }
1941 
1942 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1943 {
1944   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1945   Mat_TransMatMultDense *atb;
1946   MPI_Comm               comm;
1947   PetscMPIInt            size, *recvcounts;
1948   PetscScalar           *carray, *sendbuf;
1949   const PetscScalar     *atbarray;
1950   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1951   const PetscInt        *ranges;
1952 
1953   PetscFunctionBegin;
1954   MatCheckProduct(C, 3);
1955   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1956   atb        = (Mat_TransMatMultDense *)C->product->data;
1957   recvcounts = atb->recvcounts;
1958   sendbuf    = atb->sendbuf;
1959 
1960   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1961   PetscCallMPI(MPI_Comm_size(comm, &size));
1962 
1963   /* compute atbarray = aseq^T * bseq */
1964   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1965 
1966   PetscCall(MatGetOwnershipRanges(C, &ranges));
1967 
1968   /* arrange atbarray into sendbuf */
1969   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1970   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1971   for (proc = 0, k = 0; proc < size; proc++) {
1972     for (j = 0; j < cN; j++) {
1973       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1974     }
1975   }
1976   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1977 
1978   /* sum all atbarray to local values of C */
1979   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1980   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1981   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1982   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1983   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1984   PetscFunctionReturn(PETSC_SUCCESS);
1985 }
1986 
1987 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1988 {
1989   MPI_Comm               comm;
1990   PetscMPIInt            size;
1991   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1992   Mat_TransMatMultDense *atb;
1993   PetscBool              cisdense = PETSC_FALSE;
1994   PetscInt               i;
1995   const PetscInt        *ranges;
1996 
1997   PetscFunctionBegin;
1998   MatCheckProduct(C, 4);
1999   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2000   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2001   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2002     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2003   }
2004 
2005   /* create matrix product C */
2006   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2007 #if defined(PETSC_HAVE_CUDA)
2008   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2009 #endif
2010 #if defined(PETSC_HAVE_HIP)
2011   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2012 #endif
2013   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2014   PetscCall(MatSetUp(C));
2015 
2016   /* create data structure for reuse C */
2017   PetscCallMPI(MPI_Comm_size(comm, &size));
2018   PetscCall(PetscNew(&atb));
2019   cM = C->rmap->N;
2020   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2021   PetscCall(MatGetOwnershipRanges(C, &ranges));
2022   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2023 
2024   C->product->data    = atb;
2025   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2026   PetscFunctionReturn(PETSC_SUCCESS);
2027 }
2028 
2029 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2030 {
2031   MPI_Comm               comm;
2032   PetscMPIInt            i, size;
2033   PetscInt               maxRows, bufsiz;
2034   PetscMPIInt            tag;
2035   PetscInt               alg;
2036   Mat_MatTransMultDense *abt;
2037   Mat_Product           *product = C->product;
2038   PetscBool              flg;
2039 
2040   PetscFunctionBegin;
2041   MatCheckProduct(C, 4);
2042   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2043   /* check local size of A and B */
2044   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2045 
2046   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2047   alg = flg ? 0 : 1;
2048 
2049   /* setup matrix product C */
2050   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2051   PetscCall(MatSetType(C, MATMPIDENSE));
2052   PetscCall(MatSetUp(C));
2053   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2054 
2055   /* create data structure for reuse C */
2056   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2057   PetscCallMPI(MPI_Comm_size(comm, &size));
2058   PetscCall(PetscNew(&abt));
2059   abt->tag = tag;
2060   abt->alg = alg;
2061   switch (alg) {
2062   case 1: /* alg: "cyclic" */
2063     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2064     bufsiz = A->cmap->N * maxRows;
2065     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2066     break;
2067   default: /* alg: "allgatherv" */
2068     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2069     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2070     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2071     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2072     break;
2073   }
2074 
2075   C->product->data                = abt;
2076   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2077   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2082 {
2083   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2084   Mat_MatTransMultDense *abt;
2085   MPI_Comm               comm;
2086   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2087   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2088   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2089   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2090   const PetscScalar     *av, *bv;
2091   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2092   MPI_Request            reqs[2];
2093   const PetscInt        *ranges;
2094 
2095   PetscFunctionBegin;
2096   MatCheckProduct(C, 3);
2097   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2098   abt = (Mat_MatTransMultDense *)C->product->data;
2099   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2100   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2101   PetscCallMPI(MPI_Comm_size(comm, &size));
2102   PetscCall(MatDenseGetArrayRead(a->A, &av));
2103   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2104   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2105   PetscCall(MatDenseGetLDA(a->A, &i));
2106   PetscCall(PetscBLASIntCast(i, &alda));
2107   PetscCall(MatDenseGetLDA(b->A, &i));
2108   PetscCall(PetscBLASIntCast(i, &blda));
2109   PetscCall(MatDenseGetLDA(c->A, &i));
2110   PetscCall(PetscBLASIntCast(i, &clda));
2111   PetscCall(MatGetOwnershipRanges(B, &ranges));
2112   bn = B->rmap->n;
2113   if (blda == bn) {
2114     sendbuf = (PetscScalar *)bv;
2115   } else {
2116     sendbuf = abt->buf[0];
2117     for (k = 0, i = 0; i < cK; i++) {
2118       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2119     }
2120   }
2121   if (size > 1) {
2122     sendto   = (rank + size - 1) % size;
2123     recvfrom = (rank + size + 1) % size;
2124   } else {
2125     sendto = recvfrom = 0;
2126   }
2127   PetscCall(PetscBLASIntCast(cK, &ck));
2128   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2129   recvisfrom = rank;
2130   for (i = 0; i < size; i++) {
2131     /* we have finished receiving in sending, bufs can be read/modified */
2132     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2133     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2134 
2135     if (nextrecvisfrom != rank) {
2136       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2137       sendsiz = cK * bn;
2138       recvsiz = cK * nextbn;
2139       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2140       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2141       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2142     }
2143 
2144     /* local aseq * sendbuf^T */
2145     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2146     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2147 
2148     if (nextrecvisfrom != rank) {
2149       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2150       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2151     }
2152     bn         = nextbn;
2153     recvisfrom = nextrecvisfrom;
2154     sendbuf    = recvbuf;
2155   }
2156   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2157   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2158   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2159   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2160   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2161   PetscFunctionReturn(PETSC_SUCCESS);
2162 }
2163 
2164 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2165 {
2166   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2167   Mat_MatTransMultDense *abt;
2168   MPI_Comm               comm;
2169   PetscMPIInt            size;
2170   PetscScalar           *cv, *sendbuf, *recvbuf;
2171   const PetscScalar     *av, *bv;
2172   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2173   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2174   PetscBLASInt           cm, cn, ck, alda, clda;
2175 
2176   PetscFunctionBegin;
2177   MatCheckProduct(C, 3);
2178   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2179   abt = (Mat_MatTransMultDense *)C->product->data;
2180   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2181   PetscCallMPI(MPI_Comm_size(comm, &size));
2182   PetscCall(MatDenseGetArrayRead(a->A, &av));
2183   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2184   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2185   PetscCall(MatDenseGetLDA(a->A, &i));
2186   PetscCall(PetscBLASIntCast(i, &alda));
2187   PetscCall(MatDenseGetLDA(b->A, &blda));
2188   PetscCall(MatDenseGetLDA(c->A, &i));
2189   PetscCall(PetscBLASIntCast(i, &clda));
2190   /* copy transpose of B into buf[0] */
2191   bn      = B->rmap->n;
2192   sendbuf = abt->buf[0];
2193   recvbuf = abt->buf[1];
2194   for (k = 0, j = 0; j < bn; j++) {
2195     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2196   }
2197   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2198   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2199   PetscCall(PetscBLASIntCast(cK, &ck));
2200   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2201   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2202   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2203   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2204   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2205   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2206   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2207   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2208   PetscFunctionReturn(PETSC_SUCCESS);
2209 }
2210 
2211 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2212 {
2213   Mat_MatTransMultDense *abt;
2214 
2215   PetscFunctionBegin;
2216   MatCheckProduct(C, 3);
2217   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2218   abt = (Mat_MatTransMultDense *)C->product->data;
2219   switch (abt->alg) {
2220   case 1:
2221     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2222     break;
2223   default:
2224     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2225     break;
2226   }
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2231 {
2232   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2233 
2234   PetscFunctionBegin;
2235   PetscCall(MatDestroy(&ab->Ce));
2236   PetscCall(MatDestroy(&ab->Ae));
2237   PetscCall(MatDestroy(&ab->Be));
2238   PetscCall(PetscFree(ab));
2239   PetscFunctionReturn(PETSC_SUCCESS);
2240 }
2241 
2242 #if defined(PETSC_HAVE_ELEMENTAL)
2243 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2244 {
2245   Mat_MatMultDense *ab;
2246 
2247   PetscFunctionBegin;
2248   MatCheckProduct(C, 3);
2249   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2250   ab = (Mat_MatMultDense *)C->product->data;
2251   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2252   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2253   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2254   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2255   PetscFunctionReturn(PETSC_SUCCESS);
2256 }
2257 
2258 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2259 {
2260   Mat               Ae, Be, Ce;
2261   Mat_MatMultDense *ab;
2262 
2263   PetscFunctionBegin;
2264   MatCheckProduct(C, 4);
2265   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2266   /* check local size of A and B */
2267   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2268     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2269   }
2270 
2271   /* create elemental matrices Ae and Be */
2272   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2273   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2274   PetscCall(MatSetType(Ae, MATELEMENTAL));
2275   PetscCall(MatSetUp(Ae));
2276   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2277 
2278   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2279   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2280   PetscCall(MatSetType(Be, MATELEMENTAL));
2281   PetscCall(MatSetUp(Be));
2282   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2283 
2284   /* compute symbolic Ce = Ae*Be */
2285   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2286   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2287 
2288   /* setup C */
2289   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2290   PetscCall(MatSetType(C, MATDENSE));
2291   PetscCall(MatSetUp(C));
2292 
2293   /* create data structure for reuse Cdense */
2294   PetscCall(PetscNew(&ab));
2295   ab->Ae = Ae;
2296   ab->Be = Be;
2297   ab->Ce = Ce;
2298 
2299   C->product->data       = ab;
2300   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2301   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2302   PetscFunctionReturn(PETSC_SUCCESS);
2303 }
2304 #endif
2305 
2306 #if defined(PETSC_HAVE_ELEMENTAL)
2307 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2308 {
2309   PetscFunctionBegin;
2310   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2311   C->ops->productsymbolic = MatProductSymbolic_AB;
2312   PetscFunctionReturn(PETSC_SUCCESS);
2313 }
2314 #endif
2315 
2316 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2317 {
2318   Mat_Product *product = C->product;
2319   Mat          A = product->A, B = product->B;
2320 
2321   PetscFunctionBegin;
2322   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2323     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2324   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2325   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2326   PetscFunctionReturn(PETSC_SUCCESS);
2327 }
2328 
2329 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2330 {
2331   Mat_Product *product     = C->product;
2332   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2333   PetscInt     alg, nalg = 2;
2334   PetscBool    flg = PETSC_FALSE;
2335 
2336   PetscFunctionBegin;
2337   /* Set default algorithm */
2338   alg = 0; /* default is allgatherv */
2339   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2340   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2341 
2342   /* Get runtime option */
2343   if (product->api_user) {
2344     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2345     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2346     PetscOptionsEnd();
2347   } else {
2348     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2349     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2350     PetscOptionsEnd();
2351   }
2352   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2353 
2354   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2355   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2356   PetscFunctionReturn(PETSC_SUCCESS);
2357 }
2358 
2359 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2360 {
2361   Mat_Product *product = C->product;
2362 
2363   PetscFunctionBegin;
2364   switch (product->type) {
2365 #if defined(PETSC_HAVE_ELEMENTAL)
2366   case MATPRODUCT_AB:
2367     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2368     break;
2369 #endif
2370   case MATPRODUCT_AtB:
2371     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2372     break;
2373   case MATPRODUCT_ABt:
2374     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2375     break;
2376   default:
2377     break;
2378   }
2379   PetscFunctionReturn(PETSC_SUCCESS);
2380 }
2381