xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4551d0b5c8b5c94e77cfefe703d47ffe2ab8c839)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
467 {
468   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
469   const PetscScalar *ax;
470   PetscScalar       *ay;
471   PetscMemType       axmtype, aymtype;
472 
473   PetscFunctionBegin;
474   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
475   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
476   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
477   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
478   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
479   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
480   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
481   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
486 {
487   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
488   const PetscScalar *ax;
489   PetscScalar       *ay;
490   PetscMemType       axmtype, aymtype;
491 
492   PetscFunctionBegin;
493   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
494   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
495   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
496   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
497   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
498   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
499   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
500   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
505 {
506   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
507   const PetscScalar *ax;
508   PetscScalar       *ay;
509   PetscMemType       axmtype, aymtype;
510 
511   PetscFunctionBegin;
512   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
513   PetscCall(VecSet(yy, 0.0));
514   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
515   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
516   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
517   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
518   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
519   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
520   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
521   PetscFunctionReturn(PETSC_SUCCESS);
522 }
523 
524 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
525 {
526   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
527   const PetscScalar *ax;
528   PetscScalar       *ay;
529   PetscMemType       axmtype, aymtype;
530 
531   PetscFunctionBegin;
532   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
533   PetscCall(VecCopy(yy, zz));
534   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
535   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
536   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
537   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
538   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
539   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
540   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
541   PetscFunctionReturn(PETSC_SUCCESS);
542 }
543 
544 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
545 {
546   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
547   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
548   PetscScalar       *x;
549   const PetscScalar *av;
550 
551   PetscFunctionBegin;
552   PetscCall(VecGetArray(v, &x));
553   PetscCall(VecGetSize(v, &ng));
554   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
555   PetscCall(VecGetLocalSize(v, &nl));
556   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
557   radd = A->rmap->rstart * m;
558   PetscCall(MatDenseGetArrayRead(a->A, &av));
559   PetscCall(MatDenseGetLDA(a->A, &lda));
560   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
561   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
562   PetscCall(PetscArrayzero(x + i, nl - i));
563   PetscCall(VecRestoreArray(v, &x));
564   PetscFunctionReturn(PETSC_SUCCESS);
565 }
566 
567 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
568 {
569   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
570 
571   PetscFunctionBegin;
572   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
573   PetscCall(MatStashDestroy_Private(&mat->stash));
574   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
575   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
576   PetscCall(MatDestroy(&mdn->A));
577   PetscCall(VecDestroy(&mdn->lvec));
578   PetscCall(PetscSFDestroy(&mdn->Mvctx));
579   PetscCall(VecDestroy(&mdn->cvec));
580   PetscCall(MatDestroy(&mdn->cmat));
581 
582   PetscCall(PetscFree(mat->data));
583   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
584 
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
598 #if defined(PETSC_HAVE_ELEMENTAL)
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
600 #endif
601 #if defined(PETSC_HAVE_SCALAPACK)
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
603 #endif
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
605   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
607 #if defined(PETSC_HAVE_CUDA)
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
626 #endif
627 #if defined(PETSC_HAVE_HIP)
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
646 #endif
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
657 
658   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 #include <petscdraw.h>
663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
664 {
665   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
666   PetscMPIInt       rank;
667   PetscViewerType   vtype;
668   PetscBool         iascii, isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
674   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
675   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
676   if (iascii) {
677     PetscCall(PetscViewerGetType(viewer, &vtype));
678     PetscCall(PetscViewerGetFormat(viewer, &format));
679     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
680       MatInfo info;
681       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
682       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
683       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
684                                                    (PetscInt)info.memory));
685       PetscCall(PetscViewerFlush(viewer));
686       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
687       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
688       PetscFunctionReturn(PETSC_SUCCESS);
689     } else if (format == PETSC_VIEWER_ASCII_INFO) {
690       PetscFunctionReturn(PETSC_SUCCESS);
691     }
692   } else if (isdraw) {
693     PetscDraw draw;
694     PetscBool isnull;
695 
696     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
697     PetscCall(PetscDrawIsNull(draw, &isnull));
698     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
699   }
700 
701   {
702     /* assemble the entire matrix onto first processor. */
703     Mat          A;
704     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
705     PetscInt    *cols;
706     PetscScalar *vals;
707 
708     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
709     if (rank == 0) {
710       PetscCall(MatSetSizes(A, M, N, M, N));
711     } else {
712       PetscCall(MatSetSizes(A, 0, 0, M, N));
713     }
714     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
715     PetscCall(MatSetType(A, MATMPIDENSE));
716     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
717 
718     /* Copy the matrix ... This isn't the most efficient means,
719        but it's quick for now */
720     A->insertmode = INSERT_VALUES;
721 
722     row = mat->rmap->rstart;
723     m   = mdn->A->rmap->n;
724     for (i = 0; i < m; i++) {
725       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
726       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
727       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
728       row++;
729     }
730 
731     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
732     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
733     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
734     if (rank == 0) {
735       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
736       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
737     }
738     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
739     PetscCall(MatDestroy(&A));
740   }
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
744 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
745 {
746   PetscBool iascii, isbinary, isdraw, issocket;
747 
748   PetscFunctionBegin;
749   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
750   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
751   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
752   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
753 
754   if (iascii || issocket || isdraw) {
755     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
756   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
761 {
762   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
763   Mat            mdn = mat->A;
764   PetscLogDouble isend[5], irecv[5];
765 
766   PetscFunctionBegin;
767   info->block_size = 1.0;
768 
769   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
770 
771   isend[0] = info->nz_used;
772   isend[1] = info->nz_allocated;
773   isend[2] = info->nz_unneeded;
774   isend[3] = info->memory;
775   isend[4] = info->mallocs;
776   if (flag == MAT_LOCAL) {
777     info->nz_used      = isend[0];
778     info->nz_allocated = isend[1];
779     info->nz_unneeded  = isend[2];
780     info->memory       = isend[3];
781     info->mallocs      = isend[4];
782   } else if (flag == MAT_GLOBAL_MAX) {
783     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
784 
785     info->nz_used      = irecv[0];
786     info->nz_allocated = irecv[1];
787     info->nz_unneeded  = irecv[2];
788     info->memory       = irecv[3];
789     info->mallocs      = irecv[4];
790   } else if (flag == MAT_GLOBAL_SUM) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   }
799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800   info->fill_ratio_needed = 0;
801   info->factor_mallocs    = 0;
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
806 {
807   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
808 
809   PetscFunctionBegin;
810   switch (op) {
811   case MAT_NEW_NONZERO_LOCATIONS:
812   case MAT_NEW_NONZERO_LOCATION_ERR:
813   case MAT_NEW_NONZERO_ALLOCATION_ERR:
814     MatCheckPreallocated(A, 1);
815     PetscCall(MatSetOption(a->A, op, flg));
816     break;
817   case MAT_ROW_ORIENTED:
818     MatCheckPreallocated(A, 1);
819     a->roworiented = flg;
820     PetscCall(MatSetOption(a->A, op, flg));
821     break;
822   case MAT_FORCE_DIAGONAL_ENTRIES:
823   case MAT_KEEP_NONZERO_PATTERN:
824   case MAT_USE_HASH_TABLE:
825   case MAT_SORTED_FULL:
826     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
827     break;
828   case MAT_IGNORE_OFF_PROC_ENTRIES:
829     a->donotstash = flg;
830     break;
831   case MAT_SYMMETRIC:
832   case MAT_STRUCTURALLY_SYMMETRIC:
833   case MAT_HERMITIAN:
834   case MAT_SYMMETRY_ETERNAL:
835   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
836   case MAT_SPD:
837   case MAT_IGNORE_LOWER_TRIANGULAR:
838   case MAT_IGNORE_ZERO_ENTRIES:
839   case MAT_SPD_ETERNAL:
840     /* if the diagonal matrix is square it inherits some of the properties above */
841     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
842     break;
843   default:
844     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
845   }
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
850 {
851   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
852   const PetscScalar *l;
853   PetscScalar        x, *v, *vv, *r;
854   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
855 
856   PetscFunctionBegin;
857   PetscCall(MatDenseGetArray(mdn->A, &vv));
858   PetscCall(MatDenseGetLDA(mdn->A, &lda));
859   PetscCall(MatGetLocalSize(A, &s2, &s3));
860   if (ll) {
861     PetscCall(VecGetLocalSize(ll, &s2a));
862     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
863     PetscCall(VecGetArrayRead(ll, &l));
864     for (i = 0; i < m; i++) {
865       x = l[i];
866       v = vv + i;
867       for (j = 0; j < n; j++) {
868         (*v) *= x;
869         v += lda;
870       }
871     }
872     PetscCall(VecRestoreArrayRead(ll, &l));
873     PetscCall(PetscLogFlops(1.0 * n * m));
874   }
875   if (rr) {
876     const PetscScalar *ar;
877 
878     PetscCall(VecGetLocalSize(rr, &s3a));
879     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
880     PetscCall(VecGetArrayRead(rr, &ar));
881     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
882     PetscCall(VecGetArray(mdn->lvec, &r));
883     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
884     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
885     PetscCall(VecRestoreArrayRead(rr, &ar));
886     for (i = 0; i < n; i++) {
887       x = r[i];
888       v = vv + i * lda;
889       for (j = 0; j < m; j++) (*v++) *= x;
890     }
891     PetscCall(VecRestoreArray(mdn->lvec, &r));
892     PetscCall(PetscLogFlops(1.0 * n * m));
893   }
894   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
899 {
900   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
901   PetscInt           i, j;
902   PetscMPIInt        size;
903   PetscReal          sum = 0.0;
904   const PetscScalar *av, *v;
905 
906   PetscFunctionBegin;
907   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
908   v = av;
909   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
910   if (size == 1) {
911     PetscCall(MatNorm(mdn->A, type, nrm));
912   } else {
913     if (type == NORM_FROBENIUS) {
914       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
915         sum += PetscRealPart(PetscConj(*v) * (*v));
916         v++;
917       }
918       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
919       *nrm = PetscSqrtReal(*nrm);
920       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
921     } else if (type == NORM_1) {
922       PetscReal *tmp, *tmp2;
923       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
924       *nrm = 0.0;
925       v    = av;
926       for (j = 0; j < mdn->A->cmap->n; j++) {
927         for (i = 0; i < mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);
929           v++;
930         }
931       }
932       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
933       for (j = 0; j < A->cmap->N; j++) {
934         if (tmp2[j] > *nrm) *nrm = tmp2[j];
935       }
936       PetscCall(PetscFree2(tmp, tmp2));
937       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
938     } else if (type == NORM_INFINITY) { /* max row norm */
939       PetscReal ntemp;
940       PetscCall(MatNorm(mdn->A, type, &ntemp));
941       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
942     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
943   }
944   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
945   PetscFunctionReturn(PETSC_SUCCESS);
946 }
947 
948 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
949 {
950   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
951   Mat           B;
952   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
953   PetscInt      j, i, lda;
954   PetscScalar  *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
958   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
959     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
960     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
961     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
962     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
963   } else B = *matout;
964 
965   m = a->A->rmap->n;
966   n = a->A->cmap->n;
967   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
968   PetscCall(MatDenseGetLDA(a->A, &lda));
969   PetscCall(PetscMalloc1(m, &rwork));
970   for (i = 0; i < m; i++) rwork[i] = rstart + i;
971   for (j = 0; j < n; j++) {
972     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
973     v += lda;
974   }
975   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(PetscFree(rwork));
977   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
978   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
979   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
980     *matout = B;
981   } else {
982     PetscCall(MatHeaderMerge(A, &B));
983   }
984   PetscFunctionReturn(PETSC_SUCCESS);
985 }
986 
987 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
988 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
989 
990 static PetscErrorCode MatSetUp_MPIDense(Mat A)
991 {
992   PetscFunctionBegin;
993   PetscCall(PetscLayoutSetUp(A->rmap));
994   PetscCall(PetscLayoutSetUp(A->cmap));
995   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
996   PetscFunctionReturn(PETSC_SUCCESS);
997 }
998 
999 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1000 {
1001   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1002 
1003   PetscFunctionBegin;
1004   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1011 
1012   PetscFunctionBegin;
1013   PetscCall(MatConjugate(a->A));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1018 {
1019   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1020 
1021   PetscFunctionBegin;
1022   PetscCall(MatRealPart(a->A));
1023   PetscFunctionReturn(PETSC_SUCCESS);
1024 }
1025 
1026 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1027 {
1028   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(MatImaginaryPart(a->A));
1032   PetscFunctionReturn(PETSC_SUCCESS);
1033 }
1034 
1035 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1036 {
1037   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1038 
1039   PetscFunctionBegin;
1040   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1041   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1042   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1047 
1048 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1049 {
1050   PetscInt      i, m, n;
1051   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1052   PetscReal    *work;
1053 
1054   PetscFunctionBegin;
1055   PetscCall(MatGetSize(A, &m, &n));
1056   PetscCall(PetscMalloc1(n, &work));
1057   if (type == REDUCTION_MEAN_REALPART) {
1058     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1059   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1060     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1061   } else {
1062     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1063   }
1064   if (type == NORM_2) {
1065     for (i = 0; i < n; i++) work[i] *= work[i];
1066   }
1067   if (type == NORM_INFINITY) {
1068     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1069   } else {
1070     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1071   }
1072   PetscCall(PetscFree(work));
1073   if (type == NORM_2) {
1074     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1075   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1076     for (i = 0; i < n; i++) reductions[i] /= m;
1077   }
1078   PetscFunctionReturn(PETSC_SUCCESS);
1079 }
1080 
1081 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1082 {
1083   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1084 
1085   PetscFunctionBegin;
1086   PetscCall(MatSetRandom(d->A, rctx));
1087 #if defined(PETSC_HAVE_DEVICE)
1088   x->offloadmask = d->A->offloadmask;
1089 #endif
1090   PetscFunctionReturn(PETSC_SUCCESS);
1091 }
1092 
1093 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1094 {
1095   PetscFunctionBegin;
1096   *missing = PETSC_FALSE;
1097   PetscFunctionReturn(PETSC_SUCCESS);
1098 }
1099 
1100 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1101 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1102 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1103 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1104 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1105 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1106 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1107 
1108 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1109                                        MatGetRow_MPIDense,
1110                                        MatRestoreRow_MPIDense,
1111                                        MatMult_MPIDense,
1112                                        /*  4*/ MatMultAdd_MPIDense,
1113                                        MatMultTranspose_MPIDense,
1114                                        MatMultTransposeAdd_MPIDense,
1115                                        NULL,
1116                                        NULL,
1117                                        NULL,
1118                                        /* 10*/ NULL,
1119                                        NULL,
1120                                        NULL,
1121                                        NULL,
1122                                        MatTranspose_MPIDense,
1123                                        /* 15*/ MatGetInfo_MPIDense,
1124                                        MatEqual_MPIDense,
1125                                        MatGetDiagonal_MPIDense,
1126                                        MatDiagonalScale_MPIDense,
1127                                        MatNorm_MPIDense,
1128                                        /* 20*/ MatAssemblyBegin_MPIDense,
1129                                        MatAssemblyEnd_MPIDense,
1130                                        MatSetOption_MPIDense,
1131                                        MatZeroEntries_MPIDense,
1132                                        /* 24*/ MatZeroRows_MPIDense,
1133                                        NULL,
1134                                        NULL,
1135                                        NULL,
1136                                        NULL,
1137                                        /* 29*/ MatSetUp_MPIDense,
1138                                        NULL,
1139                                        NULL,
1140                                        MatGetDiagonalBlock_MPIDense,
1141                                        NULL,
1142                                        /* 34*/ MatDuplicate_MPIDense,
1143                                        NULL,
1144                                        NULL,
1145                                        NULL,
1146                                        NULL,
1147                                        /* 39*/ MatAXPY_MPIDense,
1148                                        MatCreateSubMatrices_MPIDense,
1149                                        NULL,
1150                                        MatGetValues_MPIDense,
1151                                        MatCopy_MPIDense,
1152                                        /* 44*/ NULL,
1153                                        MatScale_MPIDense,
1154                                        MatShift_MPIDense,
1155                                        NULL,
1156                                        NULL,
1157                                        /* 49*/ MatSetRandom_MPIDense,
1158                                        NULL,
1159                                        NULL,
1160                                        NULL,
1161                                        NULL,
1162                                        /* 54*/ NULL,
1163                                        NULL,
1164                                        NULL,
1165                                        NULL,
1166                                        NULL,
1167                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1168                                        MatDestroy_MPIDense,
1169                                        MatView_MPIDense,
1170                                        NULL,
1171                                        NULL,
1172                                        /* 64*/ NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        NULL,
1176                                        NULL,
1177                                        /* 69*/ NULL,
1178                                        NULL,
1179                                        NULL,
1180                                        NULL,
1181                                        NULL,
1182                                        /* 74*/ NULL,
1183                                        NULL,
1184                                        NULL,
1185                                        NULL,
1186                                        NULL,
1187                                        /* 79*/ NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        NULL,
1191                                        /* 83*/ MatLoad_MPIDense,
1192                                        NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        NULL,
1197                                        /* 89*/ NULL,
1198                                        NULL,
1199                                        NULL,
1200                                        NULL,
1201                                        NULL,
1202                                        /* 94*/ NULL,
1203                                        NULL,
1204                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1205                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1206                                        NULL,
1207                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1208                                        NULL,
1209                                        NULL,
1210                                        MatConjugate_MPIDense,
1211                                        NULL,
1212                                        /*104*/ NULL,
1213                                        MatRealPart_MPIDense,
1214                                        MatImaginaryPart_MPIDense,
1215                                        NULL,
1216                                        NULL,
1217                                        /*109*/ NULL,
1218                                        NULL,
1219                                        NULL,
1220                                        MatGetColumnVector_MPIDense,
1221                                        MatMissingDiagonal_MPIDense,
1222                                        /*114*/ NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        NULL,
1227                                        /*119*/ NULL,
1228                                        NULL,
1229                                        NULL,
1230                                        NULL,
1231                                        NULL,
1232                                        /*124*/ NULL,
1233                                        MatGetColumnReductions_MPIDense,
1234                                        NULL,
1235                                        NULL,
1236                                        NULL,
1237                                        /*129*/ NULL,
1238                                        NULL,
1239                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1240                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1241                                        NULL,
1242                                        /*134*/ NULL,
1243                                        NULL,
1244                                        NULL,
1245                                        NULL,
1246                                        NULL,
1247                                        /*139*/ NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        NULL,
1251                                        NULL,
1252                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1253                                        /*145*/ NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        /*150*/ NULL,
1259                                        NULL};
1260 
1261 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1262 {
1263   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1264   MatType       mtype = MATSEQDENSE;
1265 
1266   PetscFunctionBegin;
1267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1268   PetscCall(PetscLayoutSetUp(mat->rmap));
1269   PetscCall(PetscLayoutSetUp(mat->cmap));
1270   if (!a->A) {
1271     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1272     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1273   }
1274 #if defined(PETSC_HAVE_CUDA)
1275   PetscBool iscuda;
1276   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1277   if (iscuda) mtype = MATSEQDENSECUDA;
1278 #endif
1279 #if defined(PETSC_HAVE_HIP)
1280   PetscBool iship;
1281   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1282   if (iship) mtype = MATSEQDENSEHIP;
1283 #endif
1284   PetscCall(MatSetType(a->A, mtype));
1285   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1286 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1287   mat->offloadmask = a->A->offloadmask;
1288 #endif
1289   mat->preallocated = PETSC_TRUE;
1290   mat->assembled    = PETSC_TRUE;
1291   PetscFunctionReturn(PETSC_SUCCESS);
1292 }
1293 
1294 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1295 {
1296   Mat B, C;
1297 
1298   PetscFunctionBegin;
1299   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1300   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1301   PetscCall(MatDestroy(&C));
1302   if (reuse == MAT_REUSE_MATRIX) {
1303     C = *newmat;
1304   } else C = NULL;
1305   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1306   PetscCall(MatDestroy(&B));
1307   if (reuse == MAT_INPLACE_MATRIX) {
1308     PetscCall(MatHeaderReplace(A, &C));
1309   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1310   PetscFunctionReturn(PETSC_SUCCESS);
1311 }
1312 
1313 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1314 {
1315   Mat B, C;
1316 
1317   PetscFunctionBegin;
1318   PetscCall(MatDenseGetLocalMatrix(A, &C));
1319   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1320   if (reuse == MAT_REUSE_MATRIX) {
1321     C = *newmat;
1322   } else C = NULL;
1323   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1324   PetscCall(MatDestroy(&B));
1325   if (reuse == MAT_INPLACE_MATRIX) {
1326     PetscCall(MatHeaderReplace(A, &C));
1327   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1328   PetscFunctionReturn(PETSC_SUCCESS);
1329 }
1330 
1331 #if defined(PETSC_HAVE_ELEMENTAL)
1332 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1333 {
1334   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1335   Mat           mat_elemental;
1336   PetscScalar  *v;
1337   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1338 
1339   PetscFunctionBegin;
1340   if (reuse == MAT_REUSE_MATRIX) {
1341     mat_elemental = *newmat;
1342     PetscCall(MatZeroEntries(*newmat));
1343   } else {
1344     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1345     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1346     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1347     PetscCall(MatSetUp(mat_elemental));
1348     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1349   }
1350 
1351   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1352   for (i = 0; i < N; i++) cols[i] = i;
1353   for (i = 0; i < m; i++) rows[i] = rstart + i;
1354 
1355   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1356   PetscCall(MatDenseGetArray(A, &v));
1357   PetscCall(MatDenseGetLDA(a->A, &lda));
1358   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1359   else {
1360     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1361   }
1362   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1363   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1364   PetscCall(MatDenseRestoreArray(A, &v));
1365   PetscCall(PetscFree2(rows, cols));
1366 
1367   if (reuse == MAT_INPLACE_MATRIX) {
1368     PetscCall(MatHeaderReplace(A, &mat_elemental));
1369   } else {
1370     *newmat = mat_elemental;
1371   }
1372   PetscFunctionReturn(PETSC_SUCCESS);
1373 }
1374 #endif
1375 
1376 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1377 {
1378   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1379 
1380   PetscFunctionBegin;
1381   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1382   PetscFunctionReturn(PETSC_SUCCESS);
1383 }
1384 
1385 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1386 {
1387   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1388 
1389   PetscFunctionBegin;
1390   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
1394 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1395 {
1396   Mat_MPIDense *mat;
1397   PetscInt      m, nloc, N;
1398 
1399   PetscFunctionBegin;
1400   PetscCall(MatGetSize(inmat, &m, &N));
1401   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1402   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1403     PetscInt sum;
1404 
1405     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1406     /* Check sum(n) = N */
1407     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1408     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1409 
1410     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1411     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1412   }
1413 
1414   /* numeric phase */
1415   mat = (Mat_MPIDense *)(*outmat)->data;
1416   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1417   PetscFunctionReturn(PETSC_SUCCESS);
1418 }
1419 
1420 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1421 {
1422   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1423   PetscInt      lda;
1424 
1425   PetscFunctionBegin;
1426   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1427   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1428   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1429   a->vecinuse = col + 1;
1430   PetscCall(MatDenseGetLDA(a->A, &lda));
1431   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1432   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1433   *v = a->cvec;
1434   PetscFunctionReturn(PETSC_SUCCESS);
1435 }
1436 
1437 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1438 {
1439   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1440 
1441   PetscFunctionBegin;
1442   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1443   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1444   a->vecinuse = 0;
1445   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1446   PetscCall(VecResetArray(a->cvec));
1447   if (v) *v = NULL;
1448   PetscFunctionReturn(PETSC_SUCCESS);
1449 }
1450 
1451 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1452 {
1453   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1454   PetscInt      lda;
1455 
1456   PetscFunctionBegin;
1457   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1458   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1459   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1460   a->vecinuse = col + 1;
1461   PetscCall(MatDenseGetLDA(a->A, &lda));
1462   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1463   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1464   PetscCall(VecLockReadPush(a->cvec));
1465   *v = a->cvec;
1466   PetscFunctionReturn(PETSC_SUCCESS);
1467 }
1468 
1469 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1470 {
1471   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1472 
1473   PetscFunctionBegin;
1474   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1475   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1476   a->vecinuse = 0;
1477   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1478   PetscCall(VecLockReadPop(a->cvec));
1479   PetscCall(VecResetArray(a->cvec));
1480   if (v) *v = NULL;
1481   PetscFunctionReturn(PETSC_SUCCESS);
1482 }
1483 
1484 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1485 {
1486   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1487   PetscInt      lda;
1488 
1489   PetscFunctionBegin;
1490   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1491   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1492   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1493   a->vecinuse = col + 1;
1494   PetscCall(MatDenseGetLDA(a->A, &lda));
1495   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1496   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1497   *v = a->cvec;
1498   PetscFunctionReturn(PETSC_SUCCESS);
1499 }
1500 
1501 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1502 {
1503   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1504 
1505   PetscFunctionBegin;
1506   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1507   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1508   a->vecinuse = 0;
1509   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1510   PetscCall(VecResetArray(a->cvec));
1511   if (v) *v = NULL;
1512   PetscFunctionReturn(PETSC_SUCCESS);
1513 }
1514 
1515 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1516 {
1517   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1518   Mat_MPIDense *c;
1519   MPI_Comm      comm;
1520   PetscInt      pbegin, pend;
1521 
1522   PetscFunctionBegin;
1523   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1524   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1525   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1526   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1527   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1528   if (!a->cmat) {
1529     PetscCall(MatCreate(comm, &a->cmat));
1530     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1531     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1532     else {
1533       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1534       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1535       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1536     }
1537     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1538     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1539   } else {
1540     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1541     if (same && a->cmat->rmap->N != A->rmap->N) {
1542       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1543       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1544     }
1545     if (!same) {
1546       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1547       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1548       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1549       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1550       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1551     }
1552     if (cend - cbegin != a->cmat->cmap->N) {
1553       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1554       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1555       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1556       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1557     }
1558   }
1559   c = (Mat_MPIDense *)a->cmat->data;
1560   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1561   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1562 
1563   a->cmat->preallocated = PETSC_TRUE;
1564   a->cmat->assembled    = PETSC_TRUE;
1565 #if defined(PETSC_HAVE_DEVICE)
1566   a->cmat->offloadmask = c->A->offloadmask;
1567 #endif
1568   a->matinuse = cbegin + 1;
1569   *v          = a->cmat;
1570   PetscFunctionReturn(PETSC_SUCCESS);
1571 }
1572 
1573 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1574 {
1575   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1576   Mat_MPIDense *c;
1577 
1578   PetscFunctionBegin;
1579   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1580   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1581   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1582   a->matinuse = 0;
1583   c           = (Mat_MPIDense *)a->cmat->data;
1584   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1585   if (v) *v = NULL;
1586 #if defined(PETSC_HAVE_DEVICE)
1587   A->offloadmask = a->A->offloadmask;
1588 #endif
1589   PetscFunctionReturn(PETSC_SUCCESS);
1590 }
1591 
1592 /*MC
1593    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1594 
1595    Options Database Key:
1596 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1597 
1598   Level: beginner
1599 
1600 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1601 M*/
1602 PetscErrorCode MatCreate_MPIDense(Mat mat)
1603 {
1604   Mat_MPIDense *a;
1605 
1606   PetscFunctionBegin;
1607   PetscCall(PetscNew(&a));
1608   mat->data   = (void *)a;
1609   mat->ops[0] = MatOps_Values;
1610 
1611   mat->insertmode = NOT_SET_VALUES;
1612 
1613   /* build cache for off array entries formed */
1614   a->donotstash = PETSC_FALSE;
1615 
1616   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1617 
1618   /* stuff used for matrix vector multiply */
1619   a->lvec        = NULL;
1620   a->Mvctx       = NULL;
1621   a->roworiented = PETSC_TRUE;
1622 
1623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1644 #if defined(PETSC_HAVE_ELEMENTAL)
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1646 #endif
1647 #if defined(PETSC_HAVE_SCALAPACK)
1648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1649 #endif
1650 #if defined(PETSC_HAVE_CUDA)
1651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1652 #endif
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1656 #if defined(PETSC_HAVE_CUDA)
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1659 #endif
1660 #if defined(PETSC_HAVE_HIP)
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1664 #endif
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1667   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1668   PetscFunctionReturn(PETSC_SUCCESS);
1669 }
1670 
1671 /*MC
1672    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1673 
1674    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1675    and `MATMPIDENSE` otherwise.
1676 
1677    Options Database Key:
1678 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1679 
1680   Level: beginner
1681 
1682 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1683 M*/
1684 
1685 /*@C
1686   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1687 
1688   Collective
1689 
1690   Input Parameters:
1691 + B    - the matrix
1692 - data - optional location of matrix data.  Set to `NULL` for PETSc
1693    to control all matrix memory allocation.
1694 
1695   Level: intermediate
1696 
1697   Notes:
1698   The dense format is fully compatible with standard Fortran
1699   storage by columns.
1700 
1701   The data input variable is intended primarily for Fortran programmers
1702   who wish to allocate their own matrix memory space.  Most users should
1703   set `data` to `NULL`.
1704 
1705 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1706 @*/
1707 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1708 {
1709   PetscFunctionBegin;
1710   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1711   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1712   PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714 
1715 /*@
1716   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1717   array provided by the user. This is useful to avoid copying an array
1718   into a matrix
1719 
1720   Not Collective
1721 
1722   Input Parameters:
1723 + mat   - the matrix
1724 - array - the array in column major order
1725 
1726   Level: developer
1727 
1728   Note:
1729   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1730   freed when the matrix is destroyed.
1731 
1732 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1733           `MatDenseReplaceArray()`
1734 @*/
1735 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1736 {
1737   PetscFunctionBegin;
1738   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1739   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1740   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1741 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1742   mat->offloadmask = PETSC_OFFLOAD_CPU;
1743 #endif
1744   PetscFunctionReturn(PETSC_SUCCESS);
1745 }
1746 
1747 /*@
1748   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1749 
1750   Not Collective
1751 
1752   Input Parameter:
1753 . mat - the matrix
1754 
1755   Level: developer
1756 
1757   Note:
1758   You can only call this after a call to `MatDensePlaceArray()`
1759 
1760 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1761 @*/
1762 PetscErrorCode MatDenseResetArray(Mat mat)
1763 {
1764   PetscFunctionBegin;
1765   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1766   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1767   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 /*@
1772   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1773   array provided by the user. This is useful to avoid copying an array
1774   into a matrix
1775 
1776   Not Collective
1777 
1778   Input Parameters:
1779 + mat   - the matrix
1780 - array - the array in column major order
1781 
1782   Level: developer
1783 
1784   Note:
1785   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1786   freed by the user. It will be freed when the matrix is destroyed.
1787 
1788 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1789 @*/
1790 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1791 {
1792   PetscFunctionBegin;
1793   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1794   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1795   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1796 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1797   mat->offloadmask = PETSC_OFFLOAD_CPU;
1798 #endif
1799   PetscFunctionReturn(PETSC_SUCCESS);
1800 }
1801 
1802 /*@C
1803   MatCreateDense - Creates a matrix in `MATDENSE` format.
1804 
1805   Collective
1806 
1807   Input Parameters:
1808 + comm - MPI communicator
1809 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1810 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1811 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1812 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1813 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1814    to control all matrix memory allocation.
1815 
1816   Output Parameter:
1817 . A - the matrix
1818 
1819   Level: intermediate
1820 
1821   Notes:
1822   The dense format is fully compatible with standard Fortran
1823   storage by columns.
1824 
1825   Although local portions of the matrix are stored in column-major
1826   order, the matrix is partitioned across MPI ranks by row.
1827 
1828   The data input variable is intended primarily for Fortran programmers
1829   who wish to allocate their own matrix memory space.  Most users should
1830   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1831 
1832   The user MUST specify either the local or global matrix dimensions
1833   (possibly both).
1834 
1835 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1836 @*/
1837 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1838 {
1839   PetscFunctionBegin;
1840   PetscCall(MatCreate(comm, A));
1841   PetscCall(MatSetSizes(*A, m, n, M, N));
1842   PetscCall(MatSetType(*A, MATDENSE));
1843   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1844   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1845   PetscFunctionReturn(PETSC_SUCCESS);
1846 }
1847 
1848 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1849 {
1850   Mat           mat;
1851   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1852 
1853   PetscFunctionBegin;
1854   *newmat = NULL;
1855   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1856   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1857   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1858   a = (Mat_MPIDense *)mat->data;
1859 
1860   mat->factortype   = A->factortype;
1861   mat->assembled    = PETSC_TRUE;
1862   mat->preallocated = PETSC_TRUE;
1863 
1864   mat->insertmode = NOT_SET_VALUES;
1865   a->donotstash   = oldmat->donotstash;
1866 
1867   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1868   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1869 
1870   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1871 
1872   *newmat = mat;
1873   PetscFunctionReturn(PETSC_SUCCESS);
1874 }
1875 
1876 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1877 {
1878   PetscBool isbinary;
1879 #if defined(PETSC_HAVE_HDF5)
1880   PetscBool ishdf5;
1881 #endif
1882 
1883   PetscFunctionBegin;
1884   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1885   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1886   /* force binary viewer to load .info file if it has not yet done so */
1887   PetscCall(PetscViewerSetUp(viewer));
1888   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1889 #if defined(PETSC_HAVE_HDF5)
1890   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1891 #endif
1892   if (isbinary) {
1893     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1894 #if defined(PETSC_HAVE_HDF5)
1895   } else if (ishdf5) {
1896     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1897 #endif
1898   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1899   PetscFunctionReturn(PETSC_SUCCESS);
1900 }
1901 
1902 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1903 {
1904   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1905   Mat           a, b;
1906 
1907   PetscFunctionBegin;
1908   a = matA->A;
1909   b = matB->A;
1910   PetscCall(MatEqual(a, b, flag));
1911   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1916 {
1917   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1918 
1919   PetscFunctionBegin;
1920   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1921   PetscCall(MatDestroy(&atb->atb));
1922   PetscCall(PetscFree(atb));
1923   PetscFunctionReturn(PETSC_SUCCESS);
1924 }
1925 
1926 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1927 {
1928   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1929 
1930   PetscFunctionBegin;
1931   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1932   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1933   PetscCall(PetscFree(abt));
1934   PetscFunctionReturn(PETSC_SUCCESS);
1935 }
1936 
1937 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1938 {
1939   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1940   Mat_TransMatMultDense *atb;
1941   MPI_Comm               comm;
1942   PetscMPIInt            size, *recvcounts;
1943   PetscScalar           *carray, *sendbuf;
1944   const PetscScalar     *atbarray;
1945   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1946   const PetscInt        *ranges;
1947 
1948   PetscFunctionBegin;
1949   MatCheckProduct(C, 3);
1950   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1951   atb        = (Mat_TransMatMultDense *)C->product->data;
1952   recvcounts = atb->recvcounts;
1953   sendbuf    = atb->sendbuf;
1954 
1955   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1956   PetscCallMPI(MPI_Comm_size(comm, &size));
1957 
1958   /* compute atbarray = aseq^T * bseq */
1959   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1960 
1961   PetscCall(MatGetOwnershipRanges(C, &ranges));
1962 
1963   /* arrange atbarray into sendbuf */
1964   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1965   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1966   for (proc = 0, k = 0; proc < size; proc++) {
1967     for (j = 0; j < cN; j++) {
1968       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1969     }
1970   }
1971   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1972 
1973   /* sum all atbarray to local values of C */
1974   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1975   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1976   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1977   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1978   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1979   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1980   PetscFunctionReturn(PETSC_SUCCESS);
1981 }
1982 
1983 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1984 {
1985   MPI_Comm               comm;
1986   PetscMPIInt            size;
1987   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1988   Mat_TransMatMultDense *atb;
1989   PetscBool              cisdense = PETSC_FALSE;
1990   PetscInt               i;
1991   const PetscInt        *ranges;
1992 
1993   PetscFunctionBegin;
1994   MatCheckProduct(C, 4);
1995   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
1996   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1997   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1998     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
1999   }
2000 
2001   /* create matrix product C */
2002   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2003 #if defined(PETSC_HAVE_CUDA)
2004   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2005 #endif
2006 #if defined(PETSC_HAVE_HIP)
2007   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2008 #endif
2009   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2010   PetscCall(MatSetUp(C));
2011 
2012   /* create data structure for reuse C */
2013   PetscCallMPI(MPI_Comm_size(comm, &size));
2014   PetscCall(PetscNew(&atb));
2015   cM = C->rmap->N;
2016   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2017   PetscCall(MatGetOwnershipRanges(C, &ranges));
2018   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2019 
2020   C->product->data    = atb;
2021   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2022   PetscFunctionReturn(PETSC_SUCCESS);
2023 }
2024 
2025 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2026 {
2027   MPI_Comm               comm;
2028   PetscMPIInt            i, size;
2029   PetscInt               maxRows, bufsiz;
2030   PetscMPIInt            tag;
2031   PetscInt               alg;
2032   Mat_MatTransMultDense *abt;
2033   Mat_Product           *product = C->product;
2034   PetscBool              flg;
2035 
2036   PetscFunctionBegin;
2037   MatCheckProduct(C, 4);
2038   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2039   /* check local size of A and B */
2040   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2041 
2042   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2043   alg = flg ? 0 : 1;
2044 
2045   /* setup matrix product C */
2046   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2047   PetscCall(MatSetType(C, MATMPIDENSE));
2048   PetscCall(MatSetUp(C));
2049   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2050 
2051   /* create data structure for reuse C */
2052   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2053   PetscCallMPI(MPI_Comm_size(comm, &size));
2054   PetscCall(PetscNew(&abt));
2055   abt->tag = tag;
2056   abt->alg = alg;
2057   switch (alg) {
2058   case 1: /* alg: "cyclic" */
2059     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2060     bufsiz = A->cmap->N * maxRows;
2061     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2062     break;
2063   default: /* alg: "allgatherv" */
2064     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2065     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2066     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2067     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2068     break;
2069   }
2070 
2071   C->product->data                = abt;
2072   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2073   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2074   PetscFunctionReturn(PETSC_SUCCESS);
2075 }
2076 
2077 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2078 {
2079   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2080   Mat_MatTransMultDense *abt;
2081   MPI_Comm               comm;
2082   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2083   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2084   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2085   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2086   const PetscScalar     *av, *bv;
2087   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2088   MPI_Request            reqs[2];
2089   const PetscInt        *ranges;
2090 
2091   PetscFunctionBegin;
2092   MatCheckProduct(C, 3);
2093   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2094   abt = (Mat_MatTransMultDense *)C->product->data;
2095   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2096   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2097   PetscCallMPI(MPI_Comm_size(comm, &size));
2098   PetscCall(MatDenseGetArrayRead(a->A, &av));
2099   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2100   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2101   PetscCall(MatDenseGetLDA(a->A, &i));
2102   PetscCall(PetscBLASIntCast(i, &alda));
2103   PetscCall(MatDenseGetLDA(b->A, &i));
2104   PetscCall(PetscBLASIntCast(i, &blda));
2105   PetscCall(MatDenseGetLDA(c->A, &i));
2106   PetscCall(PetscBLASIntCast(i, &clda));
2107   PetscCall(MatGetOwnershipRanges(B, &ranges));
2108   bn = B->rmap->n;
2109   if (blda == bn) {
2110     sendbuf = (PetscScalar *)bv;
2111   } else {
2112     sendbuf = abt->buf[0];
2113     for (k = 0, i = 0; i < cK; i++) {
2114       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2115     }
2116   }
2117   if (size > 1) {
2118     sendto   = (rank + size - 1) % size;
2119     recvfrom = (rank + size + 1) % size;
2120   } else {
2121     sendto = recvfrom = 0;
2122   }
2123   PetscCall(PetscBLASIntCast(cK, &ck));
2124   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2125   recvisfrom = rank;
2126   for (i = 0; i < size; i++) {
2127     /* we have finished receiving in sending, bufs can be read/modified */
2128     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2129     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2130 
2131     if (nextrecvisfrom != rank) {
2132       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2133       sendsiz = cK * bn;
2134       recvsiz = cK * nextbn;
2135       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2136       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2137       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2138     }
2139 
2140     /* local aseq * sendbuf^T */
2141     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2142     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2143 
2144     if (nextrecvisfrom != rank) {
2145       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2146       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2147     }
2148     bn         = nextbn;
2149     recvisfrom = nextrecvisfrom;
2150     sendbuf    = recvbuf;
2151   }
2152   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2153   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2154   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2155   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2156   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2157   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2158   PetscFunctionReturn(PETSC_SUCCESS);
2159 }
2160 
2161 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2162 {
2163   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2164   Mat_MatTransMultDense *abt;
2165   MPI_Comm               comm;
2166   PetscMPIInt            size;
2167   PetscScalar           *cv, *sendbuf, *recvbuf;
2168   const PetscScalar     *av, *bv;
2169   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2170   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2171   PetscBLASInt           cm, cn, ck, alda, clda;
2172 
2173   PetscFunctionBegin;
2174   MatCheckProduct(C, 3);
2175   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2176   abt = (Mat_MatTransMultDense *)C->product->data;
2177   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2178   PetscCallMPI(MPI_Comm_size(comm, &size));
2179   PetscCall(MatDenseGetArrayRead(a->A, &av));
2180   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2181   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2182   PetscCall(MatDenseGetLDA(a->A, &i));
2183   PetscCall(PetscBLASIntCast(i, &alda));
2184   PetscCall(MatDenseGetLDA(b->A, &blda));
2185   PetscCall(MatDenseGetLDA(c->A, &i));
2186   PetscCall(PetscBLASIntCast(i, &clda));
2187   /* copy transpose of B into buf[0] */
2188   bn      = B->rmap->n;
2189   sendbuf = abt->buf[0];
2190   recvbuf = abt->buf[1];
2191   for (k = 0, j = 0; j < bn; j++) {
2192     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2193   }
2194   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2195   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2196   PetscCall(PetscBLASIntCast(cK, &ck));
2197   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2198   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2199   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2200   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2201   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2202   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2203   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2204   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2205   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2206   PetscFunctionReturn(PETSC_SUCCESS);
2207 }
2208 
2209 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2210 {
2211   Mat_MatTransMultDense *abt;
2212 
2213   PetscFunctionBegin;
2214   MatCheckProduct(C, 3);
2215   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2216   abt = (Mat_MatTransMultDense *)C->product->data;
2217   switch (abt->alg) {
2218   case 1:
2219     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2220     break;
2221   default:
2222     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2223     break;
2224   }
2225   PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227 
2228 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2229 {
2230   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2231 
2232   PetscFunctionBegin;
2233   PetscCall(MatDestroy(&ab->Ce));
2234   PetscCall(MatDestroy(&ab->Ae));
2235   PetscCall(MatDestroy(&ab->Be));
2236   PetscCall(PetscFree(ab));
2237   PetscFunctionReturn(PETSC_SUCCESS);
2238 }
2239 
2240 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2241 {
2242   Mat_MatMultDense *ab;
2243   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2244 
2245   PetscFunctionBegin;
2246   MatCheckProduct(C, 3);
2247   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2248   ab = (Mat_MatMultDense *)C->product->data;
2249   if (ab->Ae && ab->Ce) {
2250 #if PetscDefined(HAVE_ELEMENTAL)
2251     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2252     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2253     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2254     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2255 #else
2256     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2257 #endif
2258   } else {
2259     const PetscScalar *read;
2260     PetscScalar       *write;
2261     PetscInt           lda;
2262 
2263     PetscCall(MatDenseGetLDA(B, &lda));
2264     PetscCall(MatDenseGetArrayRead(B, &read));
2265     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2266     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2267     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2268       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2269       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2270     }
2271     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2272     PetscCall(MatDenseRestoreArrayRead(B, &read));
2273     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)(A->data))->A, ab->Be, ((Mat_MPIDense *)(C->data))->A));
2274   }
2275   PetscFunctionReturn(PETSC_SUCCESS);
2276 }
2277 
2278 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2279 {
2280   Mat_Product      *product = C->product;
2281   PetscInt          alg;
2282   Mat_MatMultDense *ab;
2283   PetscBool         flg;
2284 
2285   PetscFunctionBegin;
2286   MatCheckProduct(C, 4);
2287   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2288   /* check local size of A and B */
2289   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2290              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2291 
2292   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2293   alg = flg ? 0 : 1;
2294 
2295   /* setup C */
2296   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2297   PetscCall(MatSetType(C, MATMPIDENSE));
2298   PetscCall(MatSetUp(C));
2299 
2300   /* create data structure for reuse Cdense */
2301   PetscCall(PetscNew(&ab));
2302 
2303   switch (alg) {
2304   case 1: /* alg: "elemental" */
2305 #if PetscDefined(HAVE_ELEMENTAL)
2306     /* create elemental matrices Ae and Be */
2307     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2308     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2309     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2310     PetscCall(MatSetUp(ab->Ae));
2311     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2312 
2313     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2314     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2315     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2316     PetscCall(MatSetUp(ab->Be));
2317     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2318 
2319     /* compute symbolic Ce = Ae*Be */
2320     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2321     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2322 #else
2323     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2324 #endif
2325     break;
2326   default: /* alg: "petsc" */
2327     ab->Ae = NULL;
2328     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2329     ab->Ce = NULL;
2330     break;
2331   }
2332 
2333   C->product->data       = ab;
2334   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2335   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2336   PetscFunctionReturn(PETSC_SUCCESS);
2337 }
2338 
2339 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2340 {
2341   Mat_Product *product     = C->product;
2342   const char  *algTypes[2] = {"petsc", "elemental"};
2343   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2344   PetscBool    flg = PETSC_FALSE;
2345   PetscFunctionBegin;
2346 
2347   /* Set default algorithm */
2348   alg = 0; /* default is petsc */
2349   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2350   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2351 
2352   /* Get runtime option */
2353   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2354   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2355   PetscOptionsEnd();
2356   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2357 
2358   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2359   C->ops->productsymbolic = MatProductSymbolic_AB;
2360   PetscFunctionReturn(PETSC_SUCCESS);
2361 }
2362 
2363 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2364 {
2365   Mat_Product *product = C->product;
2366   Mat          A = product->A, B = product->B;
2367 
2368   PetscFunctionBegin;
2369   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2370              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2371   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2372   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 
2376 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2377 {
2378   Mat_Product *product     = C->product;
2379   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2380   PetscInt     alg, nalg = 2;
2381   PetscBool    flg = PETSC_FALSE;
2382 
2383   PetscFunctionBegin;
2384   /* Set default algorithm */
2385   alg = 0; /* default is allgatherv */
2386   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2387   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2388 
2389   /* Get runtime option */
2390   if (product->api_user) {
2391     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2392     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2393     PetscOptionsEnd();
2394   } else {
2395     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2396     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2397     PetscOptionsEnd();
2398   }
2399   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2400 
2401   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2402   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2403   PetscFunctionReturn(PETSC_SUCCESS);
2404 }
2405 
2406 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2407 {
2408   Mat_Product *product = C->product;
2409 
2410   PetscFunctionBegin;
2411   switch (product->type) {
2412   case MATPRODUCT_AB:
2413     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2414     break;
2415   case MATPRODUCT_AtB:
2416     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2417     break;
2418   case MATPRODUCT_ABt:
2419     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2420     break;
2421   default:
2422     break;
2423   }
2424   PetscFunctionReturn(PETSC_SUCCESS);
2425 }
2426