xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision dd8e379b23d2ef935d8131fb74f7eb73fef09263)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right-hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
467 {
468   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
469   const PetscScalar *ax;
470   PetscScalar       *ay;
471   PetscMemType       axmtype, aymtype;
472 
473   PetscFunctionBegin;
474   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
475   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
476   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
477   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
478   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
479   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
480   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
481   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
486 {
487   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
488   const PetscScalar *ax;
489   PetscScalar       *ay;
490   PetscMemType       axmtype, aymtype;
491 
492   PetscFunctionBegin;
493   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
494   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
495   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
496   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
497   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
498   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
499   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
500   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
505 {
506   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
507   const PetscScalar *ax;
508   PetscScalar       *ay;
509   PetscMemType       axmtype, aymtype;
510 
511   PetscFunctionBegin;
512   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
513   PetscCall(VecSet(yy, 0.0));
514   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
515   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
516   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
517   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
518   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
519   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
520   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
521   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
522   PetscFunctionReturn(PETSC_SUCCESS);
523 }
524 
525 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
526 {
527   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
528   const PetscScalar *ax;
529   PetscScalar       *ay;
530   PetscMemType       axmtype, aymtype;
531 
532   PetscFunctionBegin;
533   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
534   PetscCall(VecCopy(yy, zz));
535   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
536   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
537   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
538   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
539   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
540   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
541   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
542   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
543   PetscFunctionReturn(PETSC_SUCCESS);
544 }
545 
546 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
547 {
548   PetscFunctionBegin;
549   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
550   PetscFunctionReturn(PETSC_SUCCESS);
551 }
552 
553 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
554 {
555   PetscFunctionBegin;
556   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
561 {
562   PetscFunctionBegin;
563   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
564   PetscFunctionReturn(PETSC_SUCCESS);
565 }
566 
567 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
568 {
569   PetscFunctionBegin;
570   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
571   PetscFunctionReturn(PETSC_SUCCESS);
572 }
573 
574 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
575 {
576   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
577   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
578   PetscScalar       *x;
579   const PetscScalar *av;
580 
581   PetscFunctionBegin;
582   PetscCall(VecGetArray(v, &x));
583   PetscCall(VecGetSize(v, &ng));
584   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
585   PetscCall(VecGetLocalSize(v, &nl));
586   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
587   radd = A->rmap->rstart * m;
588   PetscCall(MatDenseGetArrayRead(a->A, &av));
589   PetscCall(MatDenseGetLDA(a->A, &lda));
590   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
591   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
592   PetscCall(PetscArrayzero(x + i, nl - i));
593   PetscCall(VecRestoreArray(v, &x));
594   PetscFunctionReturn(PETSC_SUCCESS);
595 }
596 
597 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
598 {
599   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
600 
601   PetscFunctionBegin;
602   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
603   PetscCall(MatStashDestroy_Private(&mat->stash));
604   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
605   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
606   PetscCall(MatDestroy(&mdn->A));
607   PetscCall(VecDestroy(&mdn->lvec));
608   PetscCall(PetscSFDestroy(&mdn->Mvctx));
609   PetscCall(VecDestroy(&mdn->cvec));
610   PetscCall(MatDestroy(&mdn->cmat));
611 
612   PetscCall(PetscFree(mat->data));
613   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
614 
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
628 #if defined(PETSC_HAVE_ELEMENTAL)
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
630 #endif
631 #if defined(PETSC_HAVE_SCALAPACK)
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
633 #endif
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
637 #if defined(PETSC_HAVE_CUDA)
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
656 #endif
657 #if defined(PETSC_HAVE_HIP)
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
673   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
674   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
675   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
676 #endif
677   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
678   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
679   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
680   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
681   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
687 
688   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
689   PetscFunctionReturn(PETSC_SUCCESS);
690 }
691 
692 #include <petscdraw.h>
693 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
694 {
695   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
696   PetscMPIInt       rank;
697   PetscViewerType   vtype;
698   PetscBool         iascii, isdraw;
699   PetscViewer       sviewer;
700   PetscViewerFormat format;
701 
702   PetscFunctionBegin;
703   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
704   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
705   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
706   if (iascii) {
707     PetscCall(PetscViewerGetType(viewer, &vtype));
708     PetscCall(PetscViewerGetFormat(viewer, &format));
709     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
710       MatInfo info;
711       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
712       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
713       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
714                                                    (PetscInt)info.memory));
715       PetscCall(PetscViewerFlush(viewer));
716       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
717       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
718       PetscFunctionReturn(PETSC_SUCCESS);
719     } else if (format == PETSC_VIEWER_ASCII_INFO) {
720       PetscFunctionReturn(PETSC_SUCCESS);
721     }
722   } else if (isdraw) {
723     PetscDraw draw;
724     PetscBool isnull;
725 
726     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
727     PetscCall(PetscDrawIsNull(draw, &isnull));
728     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
729   }
730 
731   {
732     /* assemble the entire matrix onto first processor. */
733     Mat          A;
734     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
735     PetscInt    *cols;
736     PetscScalar *vals;
737 
738     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
739     if (rank == 0) {
740       PetscCall(MatSetSizes(A, M, N, M, N));
741     } else {
742       PetscCall(MatSetSizes(A, 0, 0, M, N));
743     }
744     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
745     PetscCall(MatSetType(A, MATMPIDENSE));
746     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
747 
748     /* Copy the matrix ... This isn't the most efficient means,
749        but it's quick for now */
750     A->insertmode = INSERT_VALUES;
751 
752     row = mat->rmap->rstart;
753     m   = mdn->A->rmap->n;
754     for (i = 0; i < m; i++) {
755       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
756       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
757       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
758       row++;
759     }
760 
761     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
762     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
763     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
764     if (rank == 0) {
765       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
766       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
767     }
768     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
769     PetscCall(MatDestroy(&A));
770   }
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
775 {
776   PetscBool iascii, isbinary, isdraw, issocket;
777 
778   PetscFunctionBegin;
779   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
780   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
781   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
782   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
783 
784   if (iascii || issocket || isdraw) {
785     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
786   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
791 {
792   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
793   Mat            mdn = mat->A;
794   PetscLogDouble isend[5], irecv[5];
795 
796   PetscFunctionBegin;
797   info->block_size = 1.0;
798 
799   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
800 
801   isend[0] = info->nz_used;
802   isend[1] = info->nz_allocated;
803   isend[2] = info->nz_unneeded;
804   isend[3] = info->memory;
805   isend[4] = info->mallocs;
806   if (flag == MAT_LOCAL) {
807     info->nz_used      = isend[0];
808     info->nz_allocated = isend[1];
809     info->nz_unneeded  = isend[2];
810     info->memory       = isend[3];
811     info->mallocs      = isend[4];
812   } else if (flag == MAT_GLOBAL_MAX) {
813     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
814 
815     info->nz_used      = irecv[0];
816     info->nz_allocated = irecv[1];
817     info->nz_unneeded  = irecv[2];
818     info->memory       = irecv[3];
819     info->mallocs      = irecv[4];
820   } else if (flag == MAT_GLOBAL_SUM) {
821     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
822 
823     info->nz_used      = irecv[0];
824     info->nz_allocated = irecv[1];
825     info->nz_unneeded  = irecv[2];
826     info->memory       = irecv[3];
827     info->mallocs      = irecv[4];
828   }
829   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
830   info->fill_ratio_needed = 0;
831   info->factor_mallocs    = 0;
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
836 {
837   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
838 
839   PetscFunctionBegin;
840   switch (op) {
841   case MAT_NEW_NONZERO_LOCATIONS:
842   case MAT_NEW_NONZERO_LOCATION_ERR:
843   case MAT_NEW_NONZERO_ALLOCATION_ERR:
844     MatCheckPreallocated(A, 1);
845     PetscCall(MatSetOption(a->A, op, flg));
846     break;
847   case MAT_ROW_ORIENTED:
848     MatCheckPreallocated(A, 1);
849     a->roworiented = flg;
850     PetscCall(MatSetOption(a->A, op, flg));
851     break;
852   case MAT_FORCE_DIAGONAL_ENTRIES:
853   case MAT_KEEP_NONZERO_PATTERN:
854   case MAT_USE_HASH_TABLE:
855   case MAT_SORTED_FULL:
856     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
857     break;
858   case MAT_IGNORE_OFF_PROC_ENTRIES:
859     a->donotstash = flg;
860     break;
861   case MAT_SYMMETRIC:
862   case MAT_STRUCTURALLY_SYMMETRIC:
863   case MAT_HERMITIAN:
864   case MAT_SYMMETRY_ETERNAL:
865   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
866   case MAT_SPD:
867   case MAT_IGNORE_LOWER_TRIANGULAR:
868   case MAT_IGNORE_ZERO_ENTRIES:
869   case MAT_SPD_ETERNAL:
870     /* if the diagonal matrix is square it inherits some of the properties above */
871     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
872     break;
873   default:
874     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
875   }
876   PetscFunctionReturn(PETSC_SUCCESS);
877 }
878 
879 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
880 {
881   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
882   const PetscScalar *l;
883   PetscScalar        x, *v, *vv, *r;
884   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
885 
886   PetscFunctionBegin;
887   PetscCall(MatDenseGetArray(mdn->A, &vv));
888   PetscCall(MatDenseGetLDA(mdn->A, &lda));
889   PetscCall(MatGetLocalSize(A, &s2, &s3));
890   if (ll) {
891     PetscCall(VecGetLocalSize(ll, &s2a));
892     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
893     PetscCall(VecGetArrayRead(ll, &l));
894     for (i = 0; i < m; i++) {
895       x = l[i];
896       v = vv + i;
897       for (j = 0; j < n; j++) {
898         (*v) *= x;
899         v += lda;
900       }
901     }
902     PetscCall(VecRestoreArrayRead(ll, &l));
903     PetscCall(PetscLogFlops(1.0 * n * m));
904   }
905   if (rr) {
906     const PetscScalar *ar;
907 
908     PetscCall(VecGetLocalSize(rr, &s3a));
909     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
910     PetscCall(VecGetArrayRead(rr, &ar));
911     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
912     PetscCall(VecGetArray(mdn->lvec, &r));
913     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
914     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
915     PetscCall(VecRestoreArrayRead(rr, &ar));
916     for (i = 0; i < n; i++) {
917       x = r[i];
918       v = vv + i * lda;
919       for (j = 0; j < m; j++) (*v++) *= x;
920     }
921     PetscCall(VecRestoreArray(mdn->lvec, &r));
922     PetscCall(PetscLogFlops(1.0 * n * m));
923   }
924   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
925   PetscFunctionReturn(PETSC_SUCCESS);
926 }
927 
928 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
929 {
930   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
931   PetscInt           i, j;
932   PetscMPIInt        size;
933   PetscReal          sum = 0.0;
934   const PetscScalar *av, *v;
935 
936   PetscFunctionBegin;
937   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
938   v = av;
939   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
940   if (size == 1) {
941     PetscCall(MatNorm(mdn->A, type, nrm));
942   } else {
943     if (type == NORM_FROBENIUS) {
944       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
945         sum += PetscRealPart(PetscConj(*v) * (*v));
946         v++;
947       }
948       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
949       *nrm = PetscSqrtReal(*nrm);
950       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
951     } else if (type == NORM_1) {
952       PetscReal *tmp, *tmp2;
953       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
954       *nrm = 0.0;
955       v    = av;
956       for (j = 0; j < mdn->A->cmap->n; j++) {
957         for (i = 0; i < mdn->A->rmap->n; i++) {
958           tmp[j] += PetscAbsScalar(*v);
959           v++;
960         }
961       }
962       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
963       for (j = 0; j < A->cmap->N; j++) {
964         if (tmp2[j] > *nrm) *nrm = tmp2[j];
965       }
966       PetscCall(PetscFree2(tmp, tmp2));
967       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
968     } else if (type == NORM_INFINITY) { /* max row norm */
969       PetscReal ntemp;
970       PetscCall(MatNorm(mdn->A, type, &ntemp));
971       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
972     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
973   }
974   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
975   PetscFunctionReturn(PETSC_SUCCESS);
976 }
977 
978 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
979 {
980   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
981   Mat           B;
982   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
983   PetscInt      j, i, lda;
984   PetscScalar  *v;
985 
986   PetscFunctionBegin;
987   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
989     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
990     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
991     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
992     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
993   } else B = *matout;
994 
995   m = a->A->rmap->n;
996   n = a->A->cmap->n;
997   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
998   PetscCall(MatDenseGetLDA(a->A, &lda));
999   PetscCall(PetscMalloc1(m, &rwork));
1000   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1001   for (j = 0; j < n; j++) {
1002     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1003     v = PetscSafePointerPlusOffset(v, lda);
1004   }
1005   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1006   PetscCall(PetscFree(rwork));
1007   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1008   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1009   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1010     *matout = B;
1011   } else {
1012     PetscCall(MatHeaderMerge(A, &B));
1013   }
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1018 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1019 
1020 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1021 {
1022   PetscFunctionBegin;
1023   PetscCall(PetscLayoutSetUp(A->rmap));
1024   PetscCall(PetscLayoutSetUp(A->cmap));
1025   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1026   PetscFunctionReturn(PETSC_SUCCESS);
1027 }
1028 
1029 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1030 {
1031   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1032 
1033   PetscFunctionBegin;
1034   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1035   PetscFunctionReturn(PETSC_SUCCESS);
1036 }
1037 
1038 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1039 {
1040   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1041 
1042   PetscFunctionBegin;
1043   PetscCall(MatConjugate(a->A));
1044   PetscFunctionReturn(PETSC_SUCCESS);
1045 }
1046 
1047 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1048 {
1049   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1050 
1051   PetscFunctionBegin;
1052   PetscCall(MatRealPart(a->A));
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 
1056 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1057 {
1058   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(MatImaginaryPart(a->A));
1062   PetscFunctionReturn(PETSC_SUCCESS);
1063 }
1064 
1065 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1066 {
1067   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1068 
1069   PetscFunctionBegin;
1070   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1071   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1072   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1073   PetscFunctionReturn(PETSC_SUCCESS);
1074 }
1075 
1076 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1077 
1078 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1079 {
1080   PetscInt      i, m, n;
1081   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1082   PetscReal    *work;
1083 
1084   PetscFunctionBegin;
1085   PetscCall(MatGetSize(A, &m, &n));
1086   PetscCall(PetscMalloc1(n, &work));
1087   if (type == REDUCTION_MEAN_REALPART) {
1088     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1089   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1090     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1091   } else {
1092     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1093   }
1094   if (type == NORM_2) {
1095     for (i = 0; i < n; i++) work[i] *= work[i];
1096   }
1097   if (type == NORM_INFINITY) {
1098     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1099   } else {
1100     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1101   }
1102   PetscCall(PetscFree(work));
1103   if (type == NORM_2) {
1104     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1105   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1106     for (i = 0; i < n; i++) reductions[i] /= m;
1107   }
1108   PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110 
1111 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1112 {
1113   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1114 
1115   PetscFunctionBegin;
1116   PetscCall(MatSetRandom(d->A, rctx));
1117 #if defined(PETSC_HAVE_DEVICE)
1118   x->offloadmask = d->A->offloadmask;
1119 #endif
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
1123 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1124 {
1125   PetscFunctionBegin;
1126   *missing = PETSC_FALSE;
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1131 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1132 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1133 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1134 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1135 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1136 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1137 
1138 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1139                                        MatGetRow_MPIDense,
1140                                        MatRestoreRow_MPIDense,
1141                                        MatMult_MPIDense,
1142                                        /*  4*/ MatMultAdd_MPIDense,
1143                                        MatMultTranspose_MPIDense,
1144                                        MatMultTransposeAdd_MPIDense,
1145                                        NULL,
1146                                        NULL,
1147                                        NULL,
1148                                        /* 10*/ NULL,
1149                                        NULL,
1150                                        NULL,
1151                                        NULL,
1152                                        MatTranspose_MPIDense,
1153                                        /* 15*/ MatGetInfo_MPIDense,
1154                                        MatEqual_MPIDense,
1155                                        MatGetDiagonal_MPIDense,
1156                                        MatDiagonalScale_MPIDense,
1157                                        MatNorm_MPIDense,
1158                                        /* 20*/ MatAssemblyBegin_MPIDense,
1159                                        MatAssemblyEnd_MPIDense,
1160                                        MatSetOption_MPIDense,
1161                                        MatZeroEntries_MPIDense,
1162                                        /* 24*/ MatZeroRows_MPIDense,
1163                                        NULL,
1164                                        NULL,
1165                                        NULL,
1166                                        NULL,
1167                                        /* 29*/ MatSetUp_MPIDense,
1168                                        NULL,
1169                                        NULL,
1170                                        MatGetDiagonalBlock_MPIDense,
1171                                        NULL,
1172                                        /* 34*/ MatDuplicate_MPIDense,
1173                                        NULL,
1174                                        NULL,
1175                                        NULL,
1176                                        NULL,
1177                                        /* 39*/ MatAXPY_MPIDense,
1178                                        MatCreateSubMatrices_MPIDense,
1179                                        NULL,
1180                                        MatGetValues_MPIDense,
1181                                        MatCopy_MPIDense,
1182                                        /* 44*/ NULL,
1183                                        MatScale_MPIDense,
1184                                        MatShift_MPIDense,
1185                                        NULL,
1186                                        NULL,
1187                                        /* 49*/ MatSetRandom_MPIDense,
1188                                        NULL,
1189                                        NULL,
1190                                        NULL,
1191                                        NULL,
1192                                        /* 54*/ NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        NULL,
1197                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1198                                        MatDestroy_MPIDense,
1199                                        MatView_MPIDense,
1200                                        NULL,
1201                                        NULL,
1202                                        /* 64*/ NULL,
1203                                        NULL,
1204                                        NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        /* 69*/ NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        NULL,
1211                                        NULL,
1212                                        /* 74*/ NULL,
1213                                        NULL,
1214                                        NULL,
1215                                        NULL,
1216                                        NULL,
1217                                        /* 79*/ NULL,
1218                                        NULL,
1219                                        NULL,
1220                                        NULL,
1221                                        /* 83*/ MatLoad_MPIDense,
1222                                        NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        NULL,
1227                                        /* 89*/ NULL,
1228                                        NULL,
1229                                        NULL,
1230                                        NULL,
1231                                        NULL,
1232                                        /* 94*/ NULL,
1233                                        NULL,
1234                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1235                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1236                                        NULL,
1237                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1238                                        NULL,
1239                                        NULL,
1240                                        MatConjugate_MPIDense,
1241                                        NULL,
1242                                        /*104*/ NULL,
1243                                        MatRealPart_MPIDense,
1244                                        MatImaginaryPart_MPIDense,
1245                                        NULL,
1246                                        NULL,
1247                                        /*109*/ NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        MatGetColumnVector_MPIDense,
1251                                        MatMissingDiagonal_MPIDense,
1252                                        /*114*/ NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        /*119*/ NULL,
1258                                        NULL,
1259                                        MatMultHermitianTranspose_MPIDense,
1260                                        MatMultHermitianTransposeAdd_MPIDense,
1261                                        NULL,
1262                                        /*124*/ NULL,
1263                                        MatGetColumnReductions_MPIDense,
1264                                        NULL,
1265                                        NULL,
1266                                        NULL,
1267                                        /*129*/ NULL,
1268                                        NULL,
1269                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1270                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1271                                        NULL,
1272                                        /*134*/ NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        NULL,
1277                                        /*139*/ NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        NULL,
1281                                        NULL,
1282                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1283                                        /*145*/ NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        NULL,
1287                                        NULL,
1288                                        /*150*/ NULL,
1289                                        NULL,
1290                                        NULL};
1291 
1292 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1293 {
1294   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1295   MatType       mtype = MATSEQDENSE;
1296 
1297   PetscFunctionBegin;
1298   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1299   PetscCall(PetscLayoutSetUp(mat->rmap));
1300   PetscCall(PetscLayoutSetUp(mat->cmap));
1301   if (!a->A) {
1302     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1303     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1304   }
1305 #if defined(PETSC_HAVE_CUDA)
1306   PetscBool iscuda;
1307   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1308   if (iscuda) mtype = MATSEQDENSECUDA;
1309 #endif
1310 #if defined(PETSC_HAVE_HIP)
1311   PetscBool iship;
1312   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1313   if (iship) mtype = MATSEQDENSEHIP;
1314 #endif
1315   PetscCall(MatSetType(a->A, mtype));
1316   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1317 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1318   mat->offloadmask = a->A->offloadmask;
1319 #endif
1320   mat->preallocated = PETSC_TRUE;
1321   mat->assembled    = PETSC_TRUE;
1322   PetscFunctionReturn(PETSC_SUCCESS);
1323 }
1324 
1325 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1326 {
1327   Mat B, C;
1328 
1329   PetscFunctionBegin;
1330   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1331   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1332   PetscCall(MatDestroy(&C));
1333   if (reuse == MAT_REUSE_MATRIX) {
1334     C = *newmat;
1335   } else C = NULL;
1336   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1337   PetscCall(MatDestroy(&B));
1338   if (reuse == MAT_INPLACE_MATRIX) {
1339     PetscCall(MatHeaderReplace(A, &C));
1340   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1341   PetscFunctionReturn(PETSC_SUCCESS);
1342 }
1343 
1344 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1345 {
1346   Mat B, C;
1347 
1348   PetscFunctionBegin;
1349   PetscCall(MatDenseGetLocalMatrix(A, &C));
1350   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1351   if (reuse == MAT_REUSE_MATRIX) {
1352     C = *newmat;
1353   } else C = NULL;
1354   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1355   PetscCall(MatDestroy(&B));
1356   if (reuse == MAT_INPLACE_MATRIX) {
1357     PetscCall(MatHeaderReplace(A, &C));
1358   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1359   PetscFunctionReturn(PETSC_SUCCESS);
1360 }
1361 
1362 #if defined(PETSC_HAVE_ELEMENTAL)
1363 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1364 {
1365   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1366   Mat           mat_elemental;
1367   PetscScalar  *v;
1368   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1369 
1370   PetscFunctionBegin;
1371   if (reuse == MAT_REUSE_MATRIX) {
1372     mat_elemental = *newmat;
1373     PetscCall(MatZeroEntries(*newmat));
1374   } else {
1375     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1376     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1377     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1378     PetscCall(MatSetUp(mat_elemental));
1379     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1380   }
1381 
1382   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1383   for (i = 0; i < N; i++) cols[i] = i;
1384   for (i = 0; i < m; i++) rows[i] = rstart + i;
1385 
1386   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1387   PetscCall(MatDenseGetArray(A, &v));
1388   PetscCall(MatDenseGetLDA(a->A, &lda));
1389   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1390   else {
1391     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1392   }
1393   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1394   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1395   PetscCall(MatDenseRestoreArray(A, &v));
1396   PetscCall(PetscFree2(rows, cols));
1397 
1398   if (reuse == MAT_INPLACE_MATRIX) {
1399     PetscCall(MatHeaderReplace(A, &mat_elemental));
1400   } else {
1401     *newmat = mat_elemental;
1402   }
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 #endif
1406 
1407 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1408 {
1409   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1410 
1411   PetscFunctionBegin;
1412   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1413   PetscFunctionReturn(PETSC_SUCCESS);
1414 }
1415 
1416 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1417 {
1418   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1419 
1420   PetscFunctionBegin;
1421   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1422   PetscFunctionReturn(PETSC_SUCCESS);
1423 }
1424 
1425 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1426 {
1427   Mat_MPIDense *mat;
1428   PetscInt      m, nloc, N;
1429 
1430   PetscFunctionBegin;
1431   PetscCall(MatGetSize(inmat, &m, &N));
1432   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1433   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1434     PetscInt sum;
1435 
1436     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1437     /* Check sum(n) = N */
1438     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1439     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1440 
1441     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1442     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1443   }
1444 
1445   /* numeric phase */
1446   mat = (Mat_MPIDense *)(*outmat)->data;
1447   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1448   PetscFunctionReturn(PETSC_SUCCESS);
1449 }
1450 
1451 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1452 {
1453   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1454   PetscInt      lda;
1455 
1456   PetscFunctionBegin;
1457   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1458   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1459   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1460   a->vecinuse = col + 1;
1461   PetscCall(MatDenseGetLDA(a->A, &lda));
1462   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1463   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1464   *v = a->cvec;
1465   PetscFunctionReturn(PETSC_SUCCESS);
1466 }
1467 
1468 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1469 {
1470   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1471 
1472   PetscFunctionBegin;
1473   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1474   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1475   a->vecinuse = 0;
1476   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1477   PetscCall(VecResetArray(a->cvec));
1478   if (v) *v = NULL;
1479   PetscFunctionReturn(PETSC_SUCCESS);
1480 }
1481 
1482 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1483 {
1484   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1485   PetscInt      lda;
1486 
1487   PetscFunctionBegin;
1488   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1489   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1490   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1491   a->vecinuse = col + 1;
1492   PetscCall(MatDenseGetLDA(a->A, &lda));
1493   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1494   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1495   PetscCall(VecLockReadPush(a->cvec));
1496   *v = a->cvec;
1497   PetscFunctionReturn(PETSC_SUCCESS);
1498 }
1499 
1500 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1501 {
1502   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1503 
1504   PetscFunctionBegin;
1505   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1506   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1507   a->vecinuse = 0;
1508   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1509   PetscCall(VecLockReadPop(a->cvec));
1510   PetscCall(VecResetArray(a->cvec));
1511   if (v) *v = NULL;
1512   PetscFunctionReturn(PETSC_SUCCESS);
1513 }
1514 
1515 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1516 {
1517   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1518   PetscInt      lda;
1519 
1520   PetscFunctionBegin;
1521   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1522   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1523   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1524   a->vecinuse = col + 1;
1525   PetscCall(MatDenseGetLDA(a->A, &lda));
1526   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1527   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1528   *v = a->cvec;
1529   PetscFunctionReturn(PETSC_SUCCESS);
1530 }
1531 
1532 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1533 {
1534   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1535 
1536   PetscFunctionBegin;
1537   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1538   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1539   a->vecinuse = 0;
1540   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1541   PetscCall(VecResetArray(a->cvec));
1542   if (v) *v = NULL;
1543   PetscFunctionReturn(PETSC_SUCCESS);
1544 }
1545 
1546 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1547 {
1548   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1549   Mat_MPIDense *c;
1550   MPI_Comm      comm;
1551   PetscInt      pbegin, pend;
1552 
1553   PetscFunctionBegin;
1554   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1555   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1556   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1557   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1558   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1559   if (!a->cmat) {
1560     PetscCall(MatCreate(comm, &a->cmat));
1561     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1562     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1563     else {
1564       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1565       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1566       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1567     }
1568     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1569     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1570   } else {
1571     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1572     if (same && a->cmat->rmap->N != A->rmap->N) {
1573       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1574       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1575     }
1576     if (!same) {
1577       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1578       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1579       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1580       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1581       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1582     }
1583     if (cend - cbegin != a->cmat->cmap->N) {
1584       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1585       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1586       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1587       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1588     }
1589   }
1590   c = (Mat_MPIDense *)a->cmat->data;
1591   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1592   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1593 
1594   a->cmat->preallocated = PETSC_TRUE;
1595   a->cmat->assembled    = PETSC_TRUE;
1596 #if defined(PETSC_HAVE_DEVICE)
1597   a->cmat->offloadmask = c->A->offloadmask;
1598 #endif
1599   a->matinuse = cbegin + 1;
1600   *v          = a->cmat;
1601   PetscFunctionReturn(PETSC_SUCCESS);
1602 }
1603 
1604 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1605 {
1606   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1607   Mat_MPIDense *c;
1608 
1609   PetscFunctionBegin;
1610   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1611   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1612   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1613   a->matinuse = 0;
1614   c           = (Mat_MPIDense *)a->cmat->data;
1615   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1616   if (v) *v = NULL;
1617 #if defined(PETSC_HAVE_DEVICE)
1618   A->offloadmask = a->A->offloadmask;
1619 #endif
1620   PetscFunctionReturn(PETSC_SUCCESS);
1621 }
1622 
1623 /*MC
1624    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1625 
1626    Options Database Key:
1627 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1628 
1629   Level: beginner
1630 
1631 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1632 M*/
1633 PetscErrorCode MatCreate_MPIDense(Mat mat)
1634 {
1635   Mat_MPIDense *a;
1636 
1637   PetscFunctionBegin;
1638   PetscCall(PetscNew(&a));
1639   mat->data   = (void *)a;
1640   mat->ops[0] = MatOps_Values;
1641 
1642   mat->insertmode = NOT_SET_VALUES;
1643 
1644   /* build cache for off array entries formed */
1645   a->donotstash = PETSC_FALSE;
1646 
1647   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1648 
1649   /* stuff used for matrix vector multiply */
1650   a->lvec        = NULL;
1651   a->Mvctx       = NULL;
1652   a->roworiented = PETSC_TRUE;
1653 
1654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1675 #if defined(PETSC_HAVE_ELEMENTAL)
1676   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1677 #endif
1678 #if defined(PETSC_HAVE_SCALAPACK)
1679   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1680 #endif
1681 #if defined(PETSC_HAVE_CUDA)
1682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1683 #endif
1684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1687 #if defined(PETSC_HAVE_CUDA)
1688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1689   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1690 #endif
1691 #if defined(PETSC_HAVE_HIP)
1692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1695 #endif
1696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1697   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1698   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 /*MC
1703    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1704 
1705    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1706    and `MATMPIDENSE` otherwise.
1707 
1708    Options Database Key:
1709 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1710 
1711   Level: beginner
1712 
1713 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1714 M*/
1715 
1716 /*@C
1717   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1718 
1719   Collective
1720 
1721   Input Parameters:
1722 + B    - the matrix
1723 - data - optional location of matrix data.  Set to `NULL` for PETSc
1724    to control all matrix memory allocation.
1725 
1726   Level: intermediate
1727 
1728   Notes:
1729   The dense format is fully compatible with standard Fortran
1730   storage by columns.
1731 
1732   The data input variable is intended primarily for Fortran programmers
1733   who wish to allocate their own matrix memory space.  Most users should
1734   set `data` to `NULL`.
1735 
1736 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1737 @*/
1738 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1739 {
1740   PetscFunctionBegin;
1741   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1742   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1743   PetscFunctionReturn(PETSC_SUCCESS);
1744 }
1745 
1746 /*@
1747   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1748   array provided by the user. This is useful to avoid copying an array
1749   into a matrix
1750 
1751   Not Collective
1752 
1753   Input Parameters:
1754 + mat   - the matrix
1755 - array - the array in column major order
1756 
1757   Level: developer
1758 
1759   Note:
1760   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1761   freed when the matrix is destroyed.
1762 
1763 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1764           `MatDenseReplaceArray()`
1765 @*/
1766 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1767 {
1768   PetscFunctionBegin;
1769   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1770   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1771   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1772 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1773   mat->offloadmask = PETSC_OFFLOAD_CPU;
1774 #endif
1775   PetscFunctionReturn(PETSC_SUCCESS);
1776 }
1777 
1778 /*@
1779   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1780 
1781   Not Collective
1782 
1783   Input Parameter:
1784 . mat - the matrix
1785 
1786   Level: developer
1787 
1788   Note:
1789   You can only call this after a call to `MatDensePlaceArray()`
1790 
1791 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1792 @*/
1793 PetscErrorCode MatDenseResetArray(Mat mat)
1794 {
1795   PetscFunctionBegin;
1796   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1797   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1798   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1799   PetscFunctionReturn(PETSC_SUCCESS);
1800 }
1801 
1802 /*@
1803   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1804   array provided by the user. This is useful to avoid copying an array
1805   into a matrix
1806 
1807   Not Collective
1808 
1809   Input Parameters:
1810 + mat   - the matrix
1811 - array - the array in column major order
1812 
1813   Level: developer
1814 
1815   Note:
1816   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1817   freed by the user. It will be freed when the matrix is destroyed.
1818 
1819 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1820 @*/
1821 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1822 {
1823   PetscFunctionBegin;
1824   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1825   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1826   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1827 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1828   mat->offloadmask = PETSC_OFFLOAD_CPU;
1829 #endif
1830   PetscFunctionReturn(PETSC_SUCCESS);
1831 }
1832 
1833 /*@C
1834   MatCreateDense - Creates a matrix in `MATDENSE` format.
1835 
1836   Collective
1837 
1838   Input Parameters:
1839 + comm - MPI communicator
1840 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1841 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1842 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1843 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1844 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1845    to control all matrix memory allocation.
1846 
1847   Output Parameter:
1848 . A - the matrix
1849 
1850   Level: intermediate
1851 
1852   Notes:
1853   The dense format is fully compatible with standard Fortran
1854   storage by columns.
1855 
1856   Although local portions of the matrix are stored in column-major
1857   order, the matrix is partitioned across MPI ranks by row.
1858 
1859   The data input variable is intended primarily for Fortran programmers
1860   who wish to allocate their own matrix memory space.  Most users should
1861   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1862 
1863   The user MUST specify either the local or global matrix dimensions
1864   (possibly both).
1865 
1866 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1867 @*/
1868 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1869 {
1870   PetscFunctionBegin;
1871   PetscCall(MatCreate(comm, A));
1872   PetscCall(MatSetSizes(*A, m, n, M, N));
1873   PetscCall(MatSetType(*A, MATDENSE));
1874   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1875   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1876   PetscFunctionReturn(PETSC_SUCCESS);
1877 }
1878 
1879 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1880 {
1881   Mat           mat;
1882   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1883 
1884   PetscFunctionBegin;
1885   *newmat = NULL;
1886   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1887   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1888   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1889   a = (Mat_MPIDense *)mat->data;
1890 
1891   mat->factortype   = A->factortype;
1892   mat->assembled    = PETSC_TRUE;
1893   mat->preallocated = PETSC_TRUE;
1894 
1895   mat->insertmode = NOT_SET_VALUES;
1896   a->donotstash   = oldmat->donotstash;
1897 
1898   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1899   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1900 
1901   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1902 
1903   *newmat = mat;
1904   PetscFunctionReturn(PETSC_SUCCESS);
1905 }
1906 
1907 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1908 {
1909   PetscBool isbinary;
1910 #if defined(PETSC_HAVE_HDF5)
1911   PetscBool ishdf5;
1912 #endif
1913 
1914   PetscFunctionBegin;
1915   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1916   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1917   /* force binary viewer to load .info file if it has not yet done so */
1918   PetscCall(PetscViewerSetUp(viewer));
1919   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1920 #if defined(PETSC_HAVE_HDF5)
1921   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1922 #endif
1923   if (isbinary) {
1924     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1925 #if defined(PETSC_HAVE_HDF5)
1926   } else if (ishdf5) {
1927     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1928 #endif
1929   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1930   PetscFunctionReturn(PETSC_SUCCESS);
1931 }
1932 
1933 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1934 {
1935   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1936   Mat           a, b;
1937 
1938   PetscFunctionBegin;
1939   a = matA->A;
1940   b = matB->A;
1941   PetscCall(MatEqual(a, b, flag));
1942   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1943   PetscFunctionReturn(PETSC_SUCCESS);
1944 }
1945 
1946 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1947 {
1948   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1949 
1950   PetscFunctionBegin;
1951   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1952   PetscCall(MatDestroy(&atb->atb));
1953   PetscCall(PetscFree(atb));
1954   PetscFunctionReturn(PETSC_SUCCESS);
1955 }
1956 
1957 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1958 {
1959   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1960 
1961   PetscFunctionBegin;
1962   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1963   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1964   PetscCall(PetscFree(abt));
1965   PetscFunctionReturn(PETSC_SUCCESS);
1966 }
1967 
1968 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1969 {
1970   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1971   Mat_TransMatMultDense *atb;
1972   MPI_Comm               comm;
1973   PetscMPIInt            size, *recvcounts;
1974   PetscScalar           *carray, *sendbuf;
1975   const PetscScalar     *atbarray;
1976   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1977   const PetscInt        *ranges;
1978 
1979   PetscFunctionBegin;
1980   MatCheckProduct(C, 3);
1981   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1982   atb        = (Mat_TransMatMultDense *)C->product->data;
1983   recvcounts = atb->recvcounts;
1984   sendbuf    = atb->sendbuf;
1985 
1986   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1987   PetscCallMPI(MPI_Comm_size(comm, &size));
1988 
1989   /* compute atbarray = aseq^T * bseq */
1990   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1991 
1992   PetscCall(MatGetOwnershipRanges(C, &ranges));
1993 
1994   /* arrange atbarray into sendbuf */
1995   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1996   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1997   for (proc = 0, k = 0; proc < size; proc++) {
1998     for (j = 0; j < cN; j++) {
1999       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2000     }
2001   }
2002   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2003 
2004   /* sum all atbarray to local values of C */
2005   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2006   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2007   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2008   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2009   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2010   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2011   PetscFunctionReturn(PETSC_SUCCESS);
2012 }
2013 
2014 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2015 {
2016   MPI_Comm               comm;
2017   PetscMPIInt            size;
2018   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2019   Mat_TransMatMultDense *atb;
2020   PetscBool              cisdense = PETSC_FALSE;
2021   PetscInt               i;
2022   const PetscInt        *ranges;
2023 
2024   PetscFunctionBegin;
2025   MatCheckProduct(C, 4);
2026   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2027   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2028   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2029     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2030   }
2031 
2032   /* create matrix product C */
2033   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2034 #if defined(PETSC_HAVE_CUDA)
2035   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2036 #endif
2037 #if defined(PETSC_HAVE_HIP)
2038   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2039 #endif
2040   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2041   PetscCall(MatSetUp(C));
2042 
2043   /* create data structure for reuse C */
2044   PetscCallMPI(MPI_Comm_size(comm, &size));
2045   PetscCall(PetscNew(&atb));
2046   cM = C->rmap->N;
2047   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2048   PetscCall(MatGetOwnershipRanges(C, &ranges));
2049   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2050 
2051   C->product->data    = atb;
2052   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2057 {
2058   MPI_Comm               comm;
2059   PetscMPIInt            i, size;
2060   PetscInt               maxRows, bufsiz;
2061   PetscMPIInt            tag;
2062   PetscInt               alg;
2063   Mat_MatTransMultDense *abt;
2064   Mat_Product           *product = C->product;
2065   PetscBool              flg;
2066 
2067   PetscFunctionBegin;
2068   MatCheckProduct(C, 4);
2069   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2070   /* check local size of A and B */
2071   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2072 
2073   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2074   alg = flg ? 0 : 1;
2075 
2076   /* setup matrix product C */
2077   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2078   PetscCall(MatSetType(C, MATMPIDENSE));
2079   PetscCall(MatSetUp(C));
2080   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2081 
2082   /* create data structure for reuse C */
2083   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2084   PetscCallMPI(MPI_Comm_size(comm, &size));
2085   PetscCall(PetscNew(&abt));
2086   abt->tag = tag;
2087   abt->alg = alg;
2088   switch (alg) {
2089   case 1: /* alg: "cyclic" */
2090     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2091     bufsiz = A->cmap->N * maxRows;
2092     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2093     break;
2094   default: /* alg: "allgatherv" */
2095     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2096     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2097     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2098     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2099     break;
2100   }
2101 
2102   C->product->data                = abt;
2103   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2104   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2109 {
2110   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2111   Mat_MatTransMultDense *abt;
2112   MPI_Comm               comm;
2113   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2114   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2115   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2116   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2117   const PetscScalar     *av, *bv;
2118   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2119   MPI_Request            reqs[2];
2120   const PetscInt        *ranges;
2121 
2122   PetscFunctionBegin;
2123   MatCheckProduct(C, 3);
2124   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2125   abt = (Mat_MatTransMultDense *)C->product->data;
2126   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2127   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2128   PetscCallMPI(MPI_Comm_size(comm, &size));
2129   PetscCall(MatDenseGetArrayRead(a->A, &av));
2130   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2131   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2132   PetscCall(MatDenseGetLDA(a->A, &i));
2133   PetscCall(PetscBLASIntCast(i, &alda));
2134   PetscCall(MatDenseGetLDA(b->A, &i));
2135   PetscCall(PetscBLASIntCast(i, &blda));
2136   PetscCall(MatDenseGetLDA(c->A, &i));
2137   PetscCall(PetscBLASIntCast(i, &clda));
2138   PetscCall(MatGetOwnershipRanges(B, &ranges));
2139   bn = B->rmap->n;
2140   if (blda == bn) {
2141     sendbuf = (PetscScalar *)bv;
2142   } else {
2143     sendbuf = abt->buf[0];
2144     for (k = 0, i = 0; i < cK; i++) {
2145       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2146     }
2147   }
2148   if (size > 1) {
2149     sendto   = (rank + size - 1) % size;
2150     recvfrom = (rank + size + 1) % size;
2151   } else {
2152     sendto = recvfrom = 0;
2153   }
2154   PetscCall(PetscBLASIntCast(cK, &ck));
2155   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2156   recvisfrom = rank;
2157   for (i = 0; i < size; i++) {
2158     /* we have finished receiving in sending, bufs can be read/modified */
2159     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2160     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2161 
2162     if (nextrecvisfrom != rank) {
2163       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2164       sendsiz = cK * bn;
2165       recvsiz = cK * nextbn;
2166       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2167       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2168       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2169     }
2170 
2171     /* local aseq * sendbuf^T */
2172     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2173     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2174 
2175     if (nextrecvisfrom != rank) {
2176       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2177       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2178     }
2179     bn         = nextbn;
2180     recvisfrom = nextrecvisfrom;
2181     sendbuf    = recvbuf;
2182   }
2183   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2184   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2185   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2186   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2187   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2188   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2189   PetscFunctionReturn(PETSC_SUCCESS);
2190 }
2191 
2192 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2193 {
2194   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2195   Mat_MatTransMultDense *abt;
2196   MPI_Comm               comm;
2197   PetscMPIInt            size;
2198   PetscScalar           *cv, *sendbuf, *recvbuf;
2199   const PetscScalar     *av, *bv;
2200   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2201   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2202   PetscBLASInt           cm, cn, ck, alda, clda;
2203 
2204   PetscFunctionBegin;
2205   MatCheckProduct(C, 3);
2206   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2207   abt = (Mat_MatTransMultDense *)C->product->data;
2208   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2209   PetscCallMPI(MPI_Comm_size(comm, &size));
2210   PetscCall(MatDenseGetArrayRead(a->A, &av));
2211   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2212   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2213   PetscCall(MatDenseGetLDA(a->A, &i));
2214   PetscCall(PetscBLASIntCast(i, &alda));
2215   PetscCall(MatDenseGetLDA(b->A, &blda));
2216   PetscCall(MatDenseGetLDA(c->A, &i));
2217   PetscCall(PetscBLASIntCast(i, &clda));
2218   /* copy transpose of B into buf[0] */
2219   bn      = B->rmap->n;
2220   sendbuf = abt->buf[0];
2221   recvbuf = abt->buf[1];
2222   for (k = 0, j = 0; j < bn; j++) {
2223     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2224   }
2225   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2226   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2227   PetscCall(PetscBLASIntCast(cK, &ck));
2228   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2229   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2230   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2231   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2232   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2233   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2234   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2235   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2236   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2237   PetscFunctionReturn(PETSC_SUCCESS);
2238 }
2239 
2240 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2241 {
2242   Mat_MatTransMultDense *abt;
2243 
2244   PetscFunctionBegin;
2245   MatCheckProduct(C, 3);
2246   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2247   abt = (Mat_MatTransMultDense *)C->product->data;
2248   switch (abt->alg) {
2249   case 1:
2250     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2251     break;
2252   default:
2253     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2254     break;
2255   }
2256   PetscFunctionReturn(PETSC_SUCCESS);
2257 }
2258 
2259 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2260 {
2261   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2262 
2263   PetscFunctionBegin;
2264   PetscCall(MatDestroy(&ab->Ce));
2265   PetscCall(MatDestroy(&ab->Ae));
2266   PetscCall(MatDestroy(&ab->Be));
2267   PetscCall(PetscFree(ab));
2268   PetscFunctionReturn(PETSC_SUCCESS);
2269 }
2270 
2271 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2272 {
2273   Mat_MatMultDense *ab;
2274   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2275 
2276   PetscFunctionBegin;
2277   MatCheckProduct(C, 3);
2278   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2279   ab = (Mat_MatMultDense *)C->product->data;
2280   if (ab->Ae && ab->Ce) {
2281 #if PetscDefined(HAVE_ELEMENTAL)
2282     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2283     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2284     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2285     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2286 #else
2287     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2288 #endif
2289   } else {
2290     const PetscScalar *read;
2291     PetscScalar       *write;
2292     PetscInt           lda;
2293 
2294     PetscCall(MatDenseGetLDA(B, &lda));
2295     PetscCall(MatDenseGetArrayRead(B, &read));
2296     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2297     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2298     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2299       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2300       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2301     }
2302     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2303     PetscCall(MatDenseRestoreArrayRead(B, &read));
2304     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2305   }
2306   PetscFunctionReturn(PETSC_SUCCESS);
2307 }
2308 
2309 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2310 {
2311   Mat_Product      *product = C->product;
2312   PetscInt          alg;
2313   Mat_MatMultDense *ab;
2314   PetscBool         flg;
2315 
2316   PetscFunctionBegin;
2317   MatCheckProduct(C, 4);
2318   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2319   /* check local size of A and B */
2320   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2321              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2322 
2323   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2324   alg = flg ? 0 : 1;
2325 
2326   /* setup C */
2327   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2328   PetscCall(MatSetType(C, MATMPIDENSE));
2329   PetscCall(MatSetUp(C));
2330 
2331   /* create data structure for reuse Cdense */
2332   PetscCall(PetscNew(&ab));
2333 
2334   switch (alg) {
2335   case 1: /* alg: "elemental" */
2336 #if PetscDefined(HAVE_ELEMENTAL)
2337     /* create elemental matrices Ae and Be */
2338     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2339     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2340     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2341     PetscCall(MatSetUp(ab->Ae));
2342     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2343 
2344     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2345     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2346     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2347     PetscCall(MatSetUp(ab->Be));
2348     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2349 
2350     /* compute symbolic Ce = Ae*Be */
2351     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2352     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2353 #else
2354     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2355 #endif
2356     break;
2357   default: /* alg: "petsc" */
2358     ab->Ae = NULL;
2359     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2360     ab->Ce = NULL;
2361     break;
2362   }
2363 
2364   C->product->data       = ab;
2365   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2366   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2367   PetscFunctionReturn(PETSC_SUCCESS);
2368 }
2369 
2370 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2371 {
2372   Mat_Product *product     = C->product;
2373   const char  *algTypes[2] = {"petsc", "elemental"};
2374   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2375   PetscBool    flg = PETSC_FALSE;
2376 
2377   PetscFunctionBegin;
2378   /* Set default algorithm */
2379   alg = 0; /* default is petsc */
2380   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2381   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2382 
2383   /* Get runtime option */
2384   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2385   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2386   PetscOptionsEnd();
2387   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2388 
2389   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2390   C->ops->productsymbolic = MatProductSymbolic_AB;
2391   PetscFunctionReturn(PETSC_SUCCESS);
2392 }
2393 
2394 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2395 {
2396   Mat_Product *product = C->product;
2397   Mat          A = product->A, B = product->B;
2398 
2399   PetscFunctionBegin;
2400   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2401              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2402   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2403   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2404   PetscFunctionReturn(PETSC_SUCCESS);
2405 }
2406 
2407 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2408 {
2409   Mat_Product *product     = C->product;
2410   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2411   PetscInt     alg, nalg = 2;
2412   PetscBool    flg = PETSC_FALSE;
2413 
2414   PetscFunctionBegin;
2415   /* Set default algorithm */
2416   alg = 0; /* default is allgatherv */
2417   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2418   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2419 
2420   /* Get runtime option */
2421   if (product->api_user) {
2422     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2423     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2424     PetscOptionsEnd();
2425   } else {
2426     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2427     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2428     PetscOptionsEnd();
2429   }
2430   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2431 
2432   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2433   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2434   PetscFunctionReturn(PETSC_SUCCESS);
2435 }
2436 
2437 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2438 {
2439   Mat_Product *product = C->product;
2440 
2441   PetscFunctionBegin;
2442   switch (product->type) {
2443   case MATPRODUCT_AB:
2444     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2445     break;
2446   case MATPRODUCT_AtB:
2447     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2448     break;
2449   case MATPRODUCT_ABt:
2450     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2451     break;
2452   default:
2453     break;
2454   }
2455   PetscFunctionReturn(PETSC_SUCCESS);
2456 }
2457