xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
470 
471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472 {
473   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
474   const PetscScalar *ax;
475   PetscScalar       *ay;
476   PetscMemType       axmtype, aymtype;
477 
478   PetscFunctionBegin;
479   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
480   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
481   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
482   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
483   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
484   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
485   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
486   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
491 {
492   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
493   const PetscScalar *ax;
494   PetscScalar       *ay;
495   PetscMemType       axmtype, aymtype;
496 
497   PetscFunctionBegin;
498   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
510 {
511   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
512   const PetscScalar *ax;
513   PetscScalar       *ay;
514   PetscMemType       axmtype, aymtype;
515 
516   PetscFunctionBegin;
517   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
518   PetscCall(VecSet(yy, 0.0));
519   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
520   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
521   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
522   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
523   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
524   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
525   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
530 {
531   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
532   const PetscScalar *ax;
533   PetscScalar       *ay;
534   PetscMemType       axmtype, aymtype;
535 
536   PetscFunctionBegin;
537   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538   PetscCall(VecCopy(yy, zz));
539   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
540   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
542   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
553   PetscScalar       *x;
554   const PetscScalar *av;
555 
556   PetscFunctionBegin;
557   PetscCall(VecGetArray(v, &x));
558   PetscCall(VecGetSize(v, &ng));
559   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
560   PetscCall(VecGetLocalSize(v, &nl));
561   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
562   radd = A->rmap->rstart * m;
563   PetscCall(MatDenseGetArrayRead(a->A, &av));
564   PetscCall(MatDenseGetLDA(a->A, &lda));
565   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
566   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
567   PetscCall(PetscArrayzero(x + i, nl - i));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
578   PetscCall(MatStashDestroy_Private(&mat->stash));
579   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
580   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
581   PetscCall(MatDestroy(&mdn->A));
582   PetscCall(VecDestroy(&mdn->lvec));
583   PetscCall(PetscSFDestroy(&mdn->Mvctx));
584   PetscCall(VecDestroy(&mdn->cvec));
585   PetscCall(MatDestroy(&mdn->cmat));
586 
587   PetscCall(PetscFree(mat->data));
588   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
589 
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
603 #if defined(PETSC_HAVE_ELEMENTAL)
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
605 #endif
606 #if defined(PETSC_HAVE_SCALAPACK)
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
608 #endif
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
612 #if defined(PETSC_HAVE_CUDA)
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
631 #endif
632 #if defined(PETSC_HAVE_HIP)
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(PETSC_SUCCESS);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(PETSC_SUCCESS);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(PetscViewerFlush(viewer));
747     PetscCall(MatDestroy(&A));
748   }
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753 {
754   PetscBool iascii, isbinary, isdraw, issocket;
755 
756   PetscFunctionBegin;
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
761 
762   if (iascii || issocket || isdraw) {
763     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771   Mat            mdn = mat->A;
772   PetscLogDouble isend[5], irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size = 1.0;
776 
777   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
778 
779   isend[0] = info->nz_used;
780   isend[1] = info->nz_allocated;
781   isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;
783   isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A, 1);
823     PetscCall(MatSetOption(a->A, op, flg));
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A, 1);
827     a->roworiented = flg;
828     PetscCall(MatSetOption(a->A, op, flg));
829     break;
830   case MAT_FORCE_DIAGONAL_ENTRIES:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833   case MAT_SORTED_FULL:
834     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835     break;
836   case MAT_IGNORE_OFF_PROC_ENTRIES:
837     a->donotstash = flg;
838     break;
839   case MAT_SYMMETRIC:
840   case MAT_STRUCTURALLY_SYMMETRIC:
841   case MAT_HERMITIAN:
842   case MAT_SYMMETRY_ETERNAL:
843   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844   case MAT_SPD:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846   case MAT_IGNORE_ZERO_ENTRIES:
847   case MAT_SPD_ETERNAL:
848     /* if the diagonal matrix is square it inherits some of the properties above */
849     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850     break;
851   default:
852     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853   }
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860   const PetscScalar *l;
861   PetscScalar        x, *v, *vv, *r;
862   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
863 
864   PetscFunctionBegin;
865   PetscCall(MatDenseGetArray(mdn->A, &vv));
866   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867   PetscCall(MatGetLocalSize(A, &s2, &s3));
868   if (ll) {
869     PetscCall(VecGetLocalSize(ll, &s2a));
870     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871     PetscCall(VecGetArrayRead(ll, &l));
872     for (i = 0; i < m; i++) {
873       x = l[i];
874       v = vv + i;
875       for (j = 0; j < n; j++) {
876         (*v) *= x;
877         v += lda;
878       }
879     }
880     PetscCall(VecRestoreArrayRead(ll, &l));
881     PetscCall(PetscLogFlops(1.0 * n * m));
882   }
883   if (rr) {
884     const PetscScalar *ar;
885 
886     PetscCall(VecGetLocalSize(rr, &s3a));
887     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888     PetscCall(VecGetArrayRead(rr, &ar));
889     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890     PetscCall(VecGetArray(mdn->lvec, &r));
891     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(VecRestoreArrayRead(rr, &ar));
894     for (i = 0; i < n; i++) {
895       x = r[i];
896       v = vv + i * lda;
897       for (j = 0; j < m; j++) (*v++) *= x;
898     }
899     PetscCall(VecRestoreArray(mdn->lvec, &r));
900     PetscCall(PetscLogFlops(1.0 * n * m));
901   }
902   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907 {
908   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909   PetscInt           i, j;
910   PetscMPIInt        size;
911   PetscReal          sum = 0.0;
912   const PetscScalar *av, *v;
913 
914   PetscFunctionBegin;
915   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916   v = av;
917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918   if (size == 1) {
919     PetscCall(MatNorm(mdn->A, type, nrm));
920   } else {
921     if (type == NORM_FROBENIUS) {
922       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923         sum += PetscRealPart(PetscConj(*v) * (*v));
924         v++;
925       }
926       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927       *nrm = PetscSqrtReal(*nrm);
928       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929     } else if (type == NORM_1) {
930       PetscReal *tmp, *tmp2;
931       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932       *nrm = 0.0;
933       v    = av;
934       for (j = 0; j < mdn->A->cmap->n; j++) {
935         for (i = 0; i < mdn->A->rmap->n; i++) {
936           tmp[j] += PetscAbsScalar(*v);
937           v++;
938         }
939       }
940       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941       for (j = 0; j < A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       PetscCall(PetscFree2(tmp, tmp2));
945       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       PetscCall(MatNorm(mdn->A, type, &ntemp));
949       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951   }
952   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953   PetscFunctionReturn(PETSC_SUCCESS);
954 }
955 
956 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957 {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959   Mat           B;
960   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961   PetscInt      j, i, lda;
962   PetscScalar  *v;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971   } else B = *matout;
972 
973   m = a->A->rmap->n;
974   n = a->A->cmap->n;
975   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(MatDenseGetLDA(a->A, &lda));
977   PetscCall(PetscMalloc1(m, &rwork));
978   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979   for (j = 0; j < n; j++) {
980     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981     v += lda;
982   }
983   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984   PetscCall(PetscFree(rwork));
985   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     PetscCall(MatHeaderMerge(A, &B));
991   }
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
997 
998 static PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(PetscLayoutSetUp(A->rmap));
1002   PetscCall(PetscLayoutSetUp(A->cmap));
1003   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008 {
1009   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatConjugate(a->A));
1022   PetscFunctionReturn(PETSC_SUCCESS);
1023 }
1024 
1025 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatRealPart(a->A));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
1034 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(MatImaginaryPart(a->A));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044 {
1045   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1046 
1047   PetscFunctionBegin;
1048   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1055 
1056 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057 {
1058   PetscInt      i, m, n;
1059   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060   PetscReal    *work;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatGetSize(A, &m, &n));
1064   PetscCall(PetscMalloc1(n, &work));
1065   if (type == REDUCTION_MEAN_REALPART) {
1066     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069   } else {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071   }
1072   if (type == NORM_2) {
1073     for (i = 0; i < n; i++) work[i] *= work[i];
1074   }
1075   if (type == NORM_INFINITY) {
1076     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077   } else {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079   }
1080   PetscCall(PetscFree(work));
1081   if (type == NORM_2) {
1082     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084     for (i = 0; i < n; i++) reductions[i] /= m;
1085   }
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1090 {
1091   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatSetRandom(d->A, rctx));
1095 #if defined(PETSC_HAVE_DEVICE)
1096   x->offloadmask = d->A->offloadmask;
1097 #endif
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1102 {
1103   PetscFunctionBegin;
1104   *missing = PETSC_FALSE;
1105   PetscFunctionReturn(PETSC_SUCCESS);
1106 }
1107 
1108 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1109 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1110 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1111 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1112 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1113 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1114 
1115 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1116                                        MatGetRow_MPIDense,
1117                                        MatRestoreRow_MPIDense,
1118                                        MatMult_MPIDense,
1119                                        /*  4*/ MatMultAdd_MPIDense,
1120                                        MatMultTranspose_MPIDense,
1121                                        MatMultTransposeAdd_MPIDense,
1122                                        NULL,
1123                                        NULL,
1124                                        NULL,
1125                                        /* 10*/ NULL,
1126                                        NULL,
1127                                        NULL,
1128                                        NULL,
1129                                        MatTranspose_MPIDense,
1130                                        /* 15*/ MatGetInfo_MPIDense,
1131                                        MatEqual_MPIDense,
1132                                        MatGetDiagonal_MPIDense,
1133                                        MatDiagonalScale_MPIDense,
1134                                        MatNorm_MPIDense,
1135                                        /* 20*/ MatAssemblyBegin_MPIDense,
1136                                        MatAssemblyEnd_MPIDense,
1137                                        MatSetOption_MPIDense,
1138                                        MatZeroEntries_MPIDense,
1139                                        /* 24*/ MatZeroRows_MPIDense,
1140                                        NULL,
1141                                        NULL,
1142                                        NULL,
1143                                        NULL,
1144                                        /* 29*/ MatSetUp_MPIDense,
1145                                        NULL,
1146                                        NULL,
1147                                        MatGetDiagonalBlock_MPIDense,
1148                                        NULL,
1149                                        /* 34*/ MatDuplicate_MPIDense,
1150                                        NULL,
1151                                        NULL,
1152                                        NULL,
1153                                        NULL,
1154                                        /* 39*/ MatAXPY_MPIDense,
1155                                        MatCreateSubMatrices_MPIDense,
1156                                        NULL,
1157                                        MatGetValues_MPIDense,
1158                                        MatCopy_MPIDense,
1159                                        /* 44*/ NULL,
1160                                        MatScale_MPIDense,
1161                                        MatShift_MPIDense,
1162                                        NULL,
1163                                        NULL,
1164                                        /* 49*/ MatSetRandom_MPIDense,
1165                                        NULL,
1166                                        NULL,
1167                                        NULL,
1168                                        NULL,
1169                                        /* 54*/ NULL,
1170                                        NULL,
1171                                        NULL,
1172                                        NULL,
1173                                        NULL,
1174                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1175                                        MatDestroy_MPIDense,
1176                                        MatView_MPIDense,
1177                                        NULL,
1178                                        NULL,
1179                                        /* 64*/ NULL,
1180                                        NULL,
1181                                        NULL,
1182                                        NULL,
1183                                        NULL,
1184                                        /* 69*/ NULL,
1185                                        NULL,
1186                                        NULL,
1187                                        NULL,
1188                                        NULL,
1189                                        /* 74*/ NULL,
1190                                        NULL,
1191                                        NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        /* 79*/ NULL,
1195                                        NULL,
1196                                        NULL,
1197                                        NULL,
1198                                        /* 83*/ MatLoad_MPIDense,
1199                                        NULL,
1200                                        NULL,
1201                                        NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        /* 89*/ NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        /* 94*/ NULL,
1210                                        NULL,
1211                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1212                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1213                                        NULL,
1214                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1215                                        NULL,
1216                                        NULL,
1217                                        MatConjugate_MPIDense,
1218                                        NULL,
1219                                        /*104*/ NULL,
1220                                        MatRealPart_MPIDense,
1221                                        MatImaginaryPart_MPIDense,
1222                                        NULL,
1223                                        NULL,
1224                                        /*109*/ NULL,
1225                                        NULL,
1226                                        NULL,
1227                                        MatGetColumnVector_MPIDense,
1228                                        MatMissingDiagonal_MPIDense,
1229                                        /*114*/ NULL,
1230                                        NULL,
1231                                        NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        /*119*/ NULL,
1235                                        NULL,
1236                                        NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        /*124*/ NULL,
1240                                        MatGetColumnReductions_MPIDense,
1241                                        NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        /*129*/ NULL,
1245                                        NULL,
1246                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1247                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1248                                        NULL,
1249                                        /*134*/ NULL,
1250                                        NULL,
1251                                        NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        /*139*/ NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1260                                        /*145*/ NULL,
1261                                        NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        /*150*/ NULL,
1266                                        NULL};
1267 
1268 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1269 {
1270   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1271   MatType       mtype = MATSEQDENSE;
1272 
1273   PetscFunctionBegin;
1274   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1275   PetscCall(PetscLayoutSetUp(mat->rmap));
1276   PetscCall(PetscLayoutSetUp(mat->cmap));
1277   if (!a->A) {
1278     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1279     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1280   }
1281 #if defined(PETSC_HAVE_CUDA)
1282   PetscBool iscuda;
1283   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1284   if (iscuda) mtype = MATSEQDENSECUDA;
1285 #endif
1286 #if defined(PETSC_HAVE_HIP)
1287   PetscBool iship;
1288   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1289   if (iship) mtype = MATSEQDENSEHIP;
1290 #endif
1291   PetscCall(MatSetType(a->A, mtype));
1292   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1293 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1294   mat->offloadmask = a->A->offloadmask;
1295 #endif
1296   mat->preallocated = PETSC_TRUE;
1297   mat->assembled    = PETSC_TRUE;
1298   PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300 
1301 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1302 {
1303   Mat B, C;
1304 
1305   PetscFunctionBegin;
1306   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1307   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1308   PetscCall(MatDestroy(&C));
1309   if (reuse == MAT_REUSE_MATRIX) {
1310     C = *newmat;
1311   } else C = NULL;
1312   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1313   PetscCall(MatDestroy(&B));
1314   if (reuse == MAT_INPLACE_MATRIX) {
1315     PetscCall(MatHeaderReplace(A, &C));
1316   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1317   PetscFunctionReturn(PETSC_SUCCESS);
1318 }
1319 
1320 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1321 {
1322   Mat B, C;
1323 
1324   PetscFunctionBegin;
1325   PetscCall(MatDenseGetLocalMatrix(A, &C));
1326   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1327   if (reuse == MAT_REUSE_MATRIX) {
1328     C = *newmat;
1329   } else C = NULL;
1330   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1331   PetscCall(MatDestroy(&B));
1332   if (reuse == MAT_INPLACE_MATRIX) {
1333     PetscCall(MatHeaderReplace(A, &C));
1334   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1335   PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337 
1338 #if defined(PETSC_HAVE_ELEMENTAL)
1339 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1340 {
1341   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1342   Mat           mat_elemental;
1343   PetscScalar  *v;
1344   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1345 
1346   PetscFunctionBegin;
1347   if (reuse == MAT_REUSE_MATRIX) {
1348     mat_elemental = *newmat;
1349     PetscCall(MatZeroEntries(*newmat));
1350   } else {
1351     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1352     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1353     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1354     PetscCall(MatSetUp(mat_elemental));
1355     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1356   }
1357 
1358   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1359   for (i = 0; i < N; i++) cols[i] = i;
1360   for (i = 0; i < m; i++) rows[i] = rstart + i;
1361 
1362   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1363   PetscCall(MatDenseGetArray(A, &v));
1364   PetscCall(MatDenseGetLDA(a->A, &lda));
1365   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1366   else {
1367     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1368   }
1369   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1370   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1371   PetscCall(MatDenseRestoreArray(A, &v));
1372   PetscCall(PetscFree2(rows, cols));
1373 
1374   if (reuse == MAT_INPLACE_MATRIX) {
1375     PetscCall(MatHeaderReplace(A, &mat_elemental));
1376   } else {
1377     *newmat = mat_elemental;
1378   }
1379   PetscFunctionReturn(PETSC_SUCCESS);
1380 }
1381 #endif
1382 
1383 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1384 {
1385   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1386 
1387   PetscFunctionBegin;
1388   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1389   PetscFunctionReturn(PETSC_SUCCESS);
1390 }
1391 
1392 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1393 {
1394   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1395 
1396   PetscFunctionBegin;
1397   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1402 {
1403   Mat_MPIDense *mat;
1404   PetscInt      m, nloc, N;
1405 
1406   PetscFunctionBegin;
1407   PetscCall(MatGetSize(inmat, &m, &N));
1408   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1409   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1410     PetscInt sum;
1411 
1412     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1413     /* Check sum(n) = N */
1414     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1415     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1416 
1417     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1418     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1419   }
1420 
1421   /* numeric phase */
1422   mat = (Mat_MPIDense *)(*outmat)->data;
1423   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1424   PetscFunctionReturn(PETSC_SUCCESS);
1425 }
1426 
1427 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1428 {
1429   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1430   PetscInt      lda;
1431 
1432   PetscFunctionBegin;
1433   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1434   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1435   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1436   a->vecinuse = col + 1;
1437   PetscCall(MatDenseGetLDA(a->A, &lda));
1438   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1439   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1440   *v = a->cvec;
1441   PetscFunctionReturn(PETSC_SUCCESS);
1442 }
1443 
1444 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1445 {
1446   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1447 
1448   PetscFunctionBegin;
1449   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1450   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1451   a->vecinuse = 0;
1452   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1453   PetscCall(VecResetArray(a->cvec));
1454   if (v) *v = NULL;
1455   PetscFunctionReturn(PETSC_SUCCESS);
1456 }
1457 
1458 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1459 {
1460   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1461   PetscInt      lda;
1462 
1463   PetscFunctionBegin;
1464   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1465   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1466   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1467   a->vecinuse = col + 1;
1468   PetscCall(MatDenseGetLDA(a->A, &lda));
1469   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1470   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1471   PetscCall(VecLockReadPush(a->cvec));
1472   *v = a->cvec;
1473   PetscFunctionReturn(PETSC_SUCCESS);
1474 }
1475 
1476 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1477 {
1478   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1479 
1480   PetscFunctionBegin;
1481   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1482   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1483   a->vecinuse = 0;
1484   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1485   PetscCall(VecLockReadPop(a->cvec));
1486   PetscCall(VecResetArray(a->cvec));
1487   if (v) *v = NULL;
1488   PetscFunctionReturn(PETSC_SUCCESS);
1489 }
1490 
1491 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1492 {
1493   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1494   PetscInt      lda;
1495 
1496   PetscFunctionBegin;
1497   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1498   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1499   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1500   a->vecinuse = col + 1;
1501   PetscCall(MatDenseGetLDA(a->A, &lda));
1502   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1503   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1504   *v = a->cvec;
1505   PetscFunctionReturn(PETSC_SUCCESS);
1506 }
1507 
1508 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1509 {
1510   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1511 
1512   PetscFunctionBegin;
1513   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1514   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1515   a->vecinuse = 0;
1516   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1517   PetscCall(VecResetArray(a->cvec));
1518   if (v) *v = NULL;
1519   PetscFunctionReturn(PETSC_SUCCESS);
1520 }
1521 
1522 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1523 {
1524   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1525   Mat_MPIDense *c;
1526   MPI_Comm      comm;
1527   PetscInt      pbegin, pend;
1528 
1529   PetscFunctionBegin;
1530   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1531   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1532   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1533   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1534   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1535   if (!a->cmat) {
1536     PetscCall(MatCreate(comm, &a->cmat));
1537     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1538     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1539     else {
1540       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1541       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1542       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1543     }
1544     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1545     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1546   } else {
1547     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1548     if (same && a->cmat->rmap->N != A->rmap->N) {
1549       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1550       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1551     }
1552     if (!same) {
1553       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1554       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1555       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1556       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1557       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1558     }
1559     if (cend - cbegin != a->cmat->cmap->N) {
1560       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1561       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1562       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1563       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1564     }
1565   }
1566   c = (Mat_MPIDense *)a->cmat->data;
1567   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1568   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1569 
1570   a->cmat->preallocated = PETSC_TRUE;
1571   a->cmat->assembled    = PETSC_TRUE;
1572 #if defined(PETSC_HAVE_DEVICE)
1573   a->cmat->offloadmask = c->A->offloadmask;
1574 #endif
1575   a->matinuse = cbegin + 1;
1576   *v          = a->cmat;
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1581 {
1582   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1583   Mat_MPIDense *c;
1584 
1585   PetscFunctionBegin;
1586   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1587   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1588   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1589   a->matinuse = 0;
1590   c           = (Mat_MPIDense *)a->cmat->data;
1591   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1592   if (v) *v = NULL;
1593 #if defined(PETSC_HAVE_DEVICE)
1594   A->offloadmask = a->A->offloadmask;
1595 #endif
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 /*MC
1600    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1601 
1602    Options Database Key:
1603 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1604 
1605   Level: beginner
1606 
1607 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1608 M*/
1609 PetscErrorCode MatCreate_MPIDense(Mat mat)
1610 {
1611   Mat_MPIDense *a;
1612 
1613   PetscFunctionBegin;
1614   PetscCall(PetscNew(&a));
1615   mat->data   = (void *)a;
1616   mat->ops[0] = MatOps_Values;
1617 
1618   mat->insertmode = NOT_SET_VALUES;
1619 
1620   /* build cache for off array entries formed */
1621   a->donotstash = PETSC_FALSE;
1622 
1623   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1624 
1625   /* stuff used for matrix vector multiply */
1626   a->lvec        = NULL;
1627   a->Mvctx       = NULL;
1628   a->roworiented = PETSC_TRUE;
1629 
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1651 #if defined(PETSC_HAVE_ELEMENTAL)
1652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1653 #endif
1654 #if defined(PETSC_HAVE_SCALAPACK)
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1656 #endif
1657 #if defined(PETSC_HAVE_CUDA)
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1659 #endif
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1663 #if defined(PETSC_HAVE_CUDA)
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1666 #endif
1667 #if defined(PETSC_HAVE_HIP)
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1671 #endif
1672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1674   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1675   PetscFunctionReturn(PETSC_SUCCESS);
1676 }
1677 
1678 /*MC
1679    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1680 
1681    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1682    and `MATMPIDENSE` otherwise.
1683 
1684    Options Database Key:
1685 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1686 
1687   Level: beginner
1688 
1689 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1690 M*/
1691 
1692 /*@C
1693   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1694 
1695   Collective
1696 
1697   Input Parameters:
1698 + B    - the matrix
1699 - data - optional location of matrix data.  Set to `NULL` for PETSc
1700    to control all matrix memory allocation.
1701 
1702   Level: intermediate
1703 
1704   Notes:
1705   The dense format is fully compatible with standard Fortran
1706   storage by columns.
1707 
1708   The data input variable is intended primarily for Fortran programmers
1709   who wish to allocate their own matrix memory space.  Most users should
1710   set `data` to `NULL`.
1711 
1712 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1713 @*/
1714 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1715 {
1716   PetscFunctionBegin;
1717   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1718   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1719   PetscFunctionReturn(PETSC_SUCCESS);
1720 }
1721 
1722 /*@
1723   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1724   array provided by the user. This is useful to avoid copying an array
1725   into a matrix
1726 
1727   Not Collective
1728 
1729   Input Parameters:
1730 + mat   - the matrix
1731 - array - the array in column major order
1732 
1733   Level: developer
1734 
1735   Note:
1736   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1737   freed when the matrix is destroyed.
1738 
1739 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1740           `MatDenseReplaceArray()`
1741 @*/
1742 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1743 {
1744   PetscFunctionBegin;
1745   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1746   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1747   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1748 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1749   mat->offloadmask = PETSC_OFFLOAD_CPU;
1750 #endif
1751   PetscFunctionReturn(PETSC_SUCCESS);
1752 }
1753 
1754 /*@
1755   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1756 
1757   Not Collective
1758 
1759   Input Parameter:
1760 . mat - the matrix
1761 
1762   Level: developer
1763 
1764   Note:
1765   You can only call this after a call to `MatDensePlaceArray()`
1766 
1767 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1768 @*/
1769 PetscErrorCode MatDenseResetArray(Mat mat)
1770 {
1771   PetscFunctionBegin;
1772   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1773   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1774   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1775   PetscFunctionReturn(PETSC_SUCCESS);
1776 }
1777 
1778 /*@
1779   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1780   array provided by the user. This is useful to avoid copying an array
1781   into a matrix
1782 
1783   Not Collective
1784 
1785   Input Parameters:
1786 + mat   - the matrix
1787 - array - the array in column major order
1788 
1789   Level: developer
1790 
1791   Note:
1792   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1793   freed by the user. It will be freed when the matrix is destroyed.
1794 
1795 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1796 @*/
1797 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1798 {
1799   PetscFunctionBegin;
1800   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1801   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1802   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1803 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1804   mat->offloadmask = PETSC_OFFLOAD_CPU;
1805 #endif
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 /*@C
1810   MatCreateDense - Creates a matrix in `MATDENSE` format.
1811 
1812   Collective
1813 
1814   Input Parameters:
1815 + comm - MPI communicator
1816 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1817 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1818 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1819 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1820 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1821    to control all matrix memory allocation.
1822 
1823   Output Parameter:
1824 . A - the matrix
1825 
1826   Level: intermediate
1827 
1828   Notes:
1829   The dense format is fully compatible with standard Fortran
1830   storage by columns.
1831 
1832   Although local portions of the matrix are stored in column-major
1833   order, the matrix is partitioned across MPI ranks by row.
1834 
1835   The data input variable is intended primarily for Fortran programmers
1836   who wish to allocate their own matrix memory space.  Most users should
1837   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1838 
1839   The user MUST specify either the local or global matrix dimensions
1840   (possibly both).
1841 
1842 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1843 @*/
1844 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1845 {
1846   PetscFunctionBegin;
1847   PetscCall(MatCreate(comm, A));
1848   PetscCall(MatSetSizes(*A, m, n, M, N));
1849   PetscCall(MatSetType(*A, MATDENSE));
1850   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1851   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1852   PetscFunctionReturn(PETSC_SUCCESS);
1853 }
1854 
1855 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1856 {
1857   Mat           mat;
1858   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1859 
1860   PetscFunctionBegin;
1861   *newmat = NULL;
1862   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1863   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1864   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1865   a = (Mat_MPIDense *)mat->data;
1866 
1867   mat->factortype   = A->factortype;
1868   mat->assembled    = PETSC_TRUE;
1869   mat->preallocated = PETSC_TRUE;
1870 
1871   mat->insertmode = NOT_SET_VALUES;
1872   a->donotstash   = oldmat->donotstash;
1873 
1874   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1875   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1876 
1877   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1878 
1879   *newmat = mat;
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1884 {
1885   PetscBool isbinary;
1886 #if defined(PETSC_HAVE_HDF5)
1887   PetscBool ishdf5;
1888 #endif
1889 
1890   PetscFunctionBegin;
1891   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1892   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1893   /* force binary viewer to load .info file if it has not yet done so */
1894   PetscCall(PetscViewerSetUp(viewer));
1895   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1896 #if defined(PETSC_HAVE_HDF5)
1897   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1898 #endif
1899   if (isbinary) {
1900     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1901 #if defined(PETSC_HAVE_HDF5)
1902   } else if (ishdf5) {
1903     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1904 #endif
1905   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1906   PetscFunctionReturn(PETSC_SUCCESS);
1907 }
1908 
1909 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1910 {
1911   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1912   Mat           a, b;
1913 
1914   PetscFunctionBegin;
1915   a = matA->A;
1916   b = matB->A;
1917   PetscCall(MatEqual(a, b, flag));
1918   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1919   PetscFunctionReturn(PETSC_SUCCESS);
1920 }
1921 
1922 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1923 {
1924   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1925 
1926   PetscFunctionBegin;
1927   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1928   PetscCall(MatDestroy(&atb->atb));
1929   PetscCall(PetscFree(atb));
1930   PetscFunctionReturn(PETSC_SUCCESS);
1931 }
1932 
1933 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1934 {
1935   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1936 
1937   PetscFunctionBegin;
1938   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1939   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1940   PetscCall(PetscFree(abt));
1941   PetscFunctionReturn(PETSC_SUCCESS);
1942 }
1943 
1944 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1945 {
1946   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1947   Mat_TransMatMultDense *atb;
1948   MPI_Comm               comm;
1949   PetscMPIInt            size, *recvcounts;
1950   PetscScalar           *carray, *sendbuf;
1951   const PetscScalar     *atbarray;
1952   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1953   const PetscInt        *ranges;
1954 
1955   PetscFunctionBegin;
1956   MatCheckProduct(C, 3);
1957   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1958   atb        = (Mat_TransMatMultDense *)C->product->data;
1959   recvcounts = atb->recvcounts;
1960   sendbuf    = atb->sendbuf;
1961 
1962   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1963   PetscCallMPI(MPI_Comm_size(comm, &size));
1964 
1965   /* compute atbarray = aseq^T * bseq */
1966   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1967 
1968   PetscCall(MatGetOwnershipRanges(C, &ranges));
1969 
1970   /* arrange atbarray into sendbuf */
1971   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1972   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1973   for (proc = 0, k = 0; proc < size; proc++) {
1974     for (j = 0; j < cN; j++) {
1975       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1976     }
1977   }
1978   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1979 
1980   /* sum all atbarray to local values of C */
1981   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1982   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1983   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1984   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1985   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1986   PetscFunctionReturn(PETSC_SUCCESS);
1987 }
1988 
1989 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1990 {
1991   MPI_Comm               comm;
1992   PetscMPIInt            size;
1993   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1994   Mat_TransMatMultDense *atb;
1995   PetscBool              cisdense = PETSC_FALSE;
1996   PetscInt               i;
1997   const PetscInt        *ranges;
1998 
1999   PetscFunctionBegin;
2000   MatCheckProduct(C, 4);
2001   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2002   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2003   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2004     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2005   }
2006 
2007   /* create matrix product C */
2008   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2009 #if defined(PETSC_HAVE_CUDA)
2010   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2011 #endif
2012 #if defined(PETSC_HAVE_HIP)
2013   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2014 #endif
2015   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2016   PetscCall(MatSetUp(C));
2017 
2018   /* create data structure for reuse C */
2019   PetscCallMPI(MPI_Comm_size(comm, &size));
2020   PetscCall(PetscNew(&atb));
2021   cM = C->rmap->N;
2022   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2023   PetscCall(MatGetOwnershipRanges(C, &ranges));
2024   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2025 
2026   C->product->data    = atb;
2027   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2028   PetscFunctionReturn(PETSC_SUCCESS);
2029 }
2030 
2031 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2032 {
2033   MPI_Comm               comm;
2034   PetscMPIInt            i, size;
2035   PetscInt               maxRows, bufsiz;
2036   PetscMPIInt            tag;
2037   PetscInt               alg;
2038   Mat_MatTransMultDense *abt;
2039   Mat_Product           *product = C->product;
2040   PetscBool              flg;
2041 
2042   PetscFunctionBegin;
2043   MatCheckProduct(C, 4);
2044   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2045   /* check local size of A and B */
2046   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2047 
2048   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2049   alg = flg ? 0 : 1;
2050 
2051   /* setup matrix product C */
2052   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2053   PetscCall(MatSetType(C, MATMPIDENSE));
2054   PetscCall(MatSetUp(C));
2055   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2056 
2057   /* create data structure for reuse C */
2058   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2059   PetscCallMPI(MPI_Comm_size(comm, &size));
2060   PetscCall(PetscNew(&abt));
2061   abt->tag = tag;
2062   abt->alg = alg;
2063   switch (alg) {
2064   case 1: /* alg: "cyclic" */
2065     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2066     bufsiz = A->cmap->N * maxRows;
2067     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2068     break;
2069   default: /* alg: "allgatherv" */
2070     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2071     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2072     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2073     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2074     break;
2075   }
2076 
2077   C->product->data                = abt;
2078   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2079   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2084 {
2085   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2086   Mat_MatTransMultDense *abt;
2087   MPI_Comm               comm;
2088   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2089   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2090   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2091   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2092   const PetscScalar     *av, *bv;
2093   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2094   MPI_Request            reqs[2];
2095   const PetscInt        *ranges;
2096 
2097   PetscFunctionBegin;
2098   MatCheckProduct(C, 3);
2099   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2100   abt = (Mat_MatTransMultDense *)C->product->data;
2101   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2102   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2103   PetscCallMPI(MPI_Comm_size(comm, &size));
2104   PetscCall(MatDenseGetArrayRead(a->A, &av));
2105   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2106   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2107   PetscCall(MatDenseGetLDA(a->A, &i));
2108   PetscCall(PetscBLASIntCast(i, &alda));
2109   PetscCall(MatDenseGetLDA(b->A, &i));
2110   PetscCall(PetscBLASIntCast(i, &blda));
2111   PetscCall(MatDenseGetLDA(c->A, &i));
2112   PetscCall(PetscBLASIntCast(i, &clda));
2113   PetscCall(MatGetOwnershipRanges(B, &ranges));
2114   bn = B->rmap->n;
2115   if (blda == bn) {
2116     sendbuf = (PetscScalar *)bv;
2117   } else {
2118     sendbuf = abt->buf[0];
2119     for (k = 0, i = 0; i < cK; i++) {
2120       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2121     }
2122   }
2123   if (size > 1) {
2124     sendto   = (rank + size - 1) % size;
2125     recvfrom = (rank + size + 1) % size;
2126   } else {
2127     sendto = recvfrom = 0;
2128   }
2129   PetscCall(PetscBLASIntCast(cK, &ck));
2130   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2131   recvisfrom = rank;
2132   for (i = 0; i < size; i++) {
2133     /* we have finished receiving in sending, bufs can be read/modified */
2134     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2135     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2136 
2137     if (nextrecvisfrom != rank) {
2138       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2139       sendsiz = cK * bn;
2140       recvsiz = cK * nextbn;
2141       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2142       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2143       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2144     }
2145 
2146     /* local aseq * sendbuf^T */
2147     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2148     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2149 
2150     if (nextrecvisfrom != rank) {
2151       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2152       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2153     }
2154     bn         = nextbn;
2155     recvisfrom = nextrecvisfrom;
2156     sendbuf    = recvbuf;
2157   }
2158   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2159   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2160   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2161   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2162   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2163   PetscFunctionReturn(PETSC_SUCCESS);
2164 }
2165 
2166 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2167 {
2168   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2169   Mat_MatTransMultDense *abt;
2170   MPI_Comm               comm;
2171   PetscMPIInt            size;
2172   PetscScalar           *cv, *sendbuf, *recvbuf;
2173   const PetscScalar     *av, *bv;
2174   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2175   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2176   PetscBLASInt           cm, cn, ck, alda, clda;
2177 
2178   PetscFunctionBegin;
2179   MatCheckProduct(C, 3);
2180   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2181   abt = (Mat_MatTransMultDense *)C->product->data;
2182   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2183   PetscCallMPI(MPI_Comm_size(comm, &size));
2184   PetscCall(MatDenseGetArrayRead(a->A, &av));
2185   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2186   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2187   PetscCall(MatDenseGetLDA(a->A, &i));
2188   PetscCall(PetscBLASIntCast(i, &alda));
2189   PetscCall(MatDenseGetLDA(b->A, &blda));
2190   PetscCall(MatDenseGetLDA(c->A, &i));
2191   PetscCall(PetscBLASIntCast(i, &clda));
2192   /* copy transpose of B into buf[0] */
2193   bn      = B->rmap->n;
2194   sendbuf = abt->buf[0];
2195   recvbuf = abt->buf[1];
2196   for (k = 0, j = 0; j < bn; j++) {
2197     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2198   }
2199   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2200   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2201   PetscCall(PetscBLASIntCast(cK, &ck));
2202   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2203   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2204   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2205   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2206   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2207   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2208   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2209   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2214 {
2215   Mat_MatTransMultDense *abt;
2216 
2217   PetscFunctionBegin;
2218   MatCheckProduct(C, 3);
2219   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2220   abt = (Mat_MatTransMultDense *)C->product->data;
2221   switch (abt->alg) {
2222   case 1:
2223     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2224     break;
2225   default:
2226     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2227     break;
2228   }
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 #if defined(PETSC_HAVE_ELEMENTAL)
2233 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2234 {
2235   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2236 
2237   PetscFunctionBegin;
2238   PetscCall(MatDestroy(&ab->Ce));
2239   PetscCall(MatDestroy(&ab->Ae));
2240   PetscCall(MatDestroy(&ab->Be));
2241   PetscCall(PetscFree(ab));
2242   PetscFunctionReturn(PETSC_SUCCESS);
2243 }
2244 
2245 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2246 {
2247   Mat_MatMultDense *ab;
2248 
2249   PetscFunctionBegin;
2250   MatCheckProduct(C, 3);
2251   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2252   ab = (Mat_MatMultDense *)C->product->data;
2253   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2254   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2255   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2256   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2257   PetscFunctionReturn(PETSC_SUCCESS);
2258 }
2259 
2260 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2261 {
2262   Mat               Ae, Be, Ce;
2263   Mat_MatMultDense *ab;
2264 
2265   PetscFunctionBegin;
2266   MatCheckProduct(C, 4);
2267   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2268   /* check local size of A and B */
2269   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2270     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2271   }
2272 
2273   /* create elemental matrices Ae and Be */
2274   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2275   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2276   PetscCall(MatSetType(Ae, MATELEMENTAL));
2277   PetscCall(MatSetUp(Ae));
2278   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2279 
2280   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2281   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2282   PetscCall(MatSetType(Be, MATELEMENTAL));
2283   PetscCall(MatSetUp(Be));
2284   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2285 
2286   /* compute symbolic Ce = Ae*Be */
2287   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2288   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2289 
2290   /* setup C */
2291   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2292   PetscCall(MatSetType(C, MATDENSE));
2293   PetscCall(MatSetUp(C));
2294 
2295   /* create data structure for reuse Cdense */
2296   PetscCall(PetscNew(&ab));
2297   ab->Ae = Ae;
2298   ab->Be = Be;
2299   ab->Ce = Ce;
2300 
2301   C->product->data       = ab;
2302   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2303   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2304   PetscFunctionReturn(PETSC_SUCCESS);
2305 }
2306 #endif
2307 
2308 #if defined(PETSC_HAVE_ELEMENTAL)
2309 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2310 {
2311   PetscFunctionBegin;
2312   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2313   C->ops->productsymbolic = MatProductSymbolic_AB;
2314   PetscFunctionReturn(PETSC_SUCCESS);
2315 }
2316 #endif
2317 
2318 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2319 {
2320   Mat_Product *product = C->product;
2321   Mat          A = product->A, B = product->B;
2322 
2323   PetscFunctionBegin;
2324   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2325     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2326   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2327   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2332 {
2333   Mat_Product *product     = C->product;
2334   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2335   PetscInt     alg, nalg = 2;
2336   PetscBool    flg = PETSC_FALSE;
2337 
2338   PetscFunctionBegin;
2339   /* Set default algorithm */
2340   alg = 0; /* default is allgatherv */
2341   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2342   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2343 
2344   /* Get runtime option */
2345   if (product->api_user) {
2346     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2347     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2348     PetscOptionsEnd();
2349   } else {
2350     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2351     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2352     PetscOptionsEnd();
2353   }
2354   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2355 
2356   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2357   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2358   PetscFunctionReturn(PETSC_SUCCESS);
2359 }
2360 
2361 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2362 {
2363   Mat_Product *product = C->product;
2364 
2365   PetscFunctionBegin;
2366   switch (product->type) {
2367 #if defined(PETSC_HAVE_ELEMENTAL)
2368   case MATPRODUCT_AB:
2369     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2370     break;
2371 #endif
2372   case MATPRODUCT_AtB:
2373     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2374     break;
2375   case MATPRODUCT_ABt:
2376     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2377     break;
2378   default:
2379     break;
2380   }
2381   PetscFunctionReturn(PETSC_SUCCESS);
2382 }
2383