xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision fbf9dbe564678ed6eff1806adbc4c4f01b9743f4)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the sequential or MPI `MATDENSE` matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscValidPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if PetscDefined(HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif PetscDefined(HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if PetscDefined(HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif PetscDefined(HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
554   PetscScalar       *x, zero = 0.0;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecSet(v, zero));
559   PetscCall(VecGetArray(v, &x));
560   PetscCall(VecGetSize(v, &n));
561   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579 #endif
580   PetscCall(MatStashDestroy_Private(&mat->stash));
581   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583   PetscCall(MatDestroy(&mdn->A));
584   PetscCall(VecDestroy(&mdn->lvec));
585   PetscCall(PetscSFDestroy(&mdn->Mvctx));
586   PetscCall(VecDestroy(&mdn->cvec));
587   PetscCall(MatDestroy(&mdn->cmat));
588 
589   PetscCall(PetscFree(mat->data));
590   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
591 
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605 #if defined(PETSC_HAVE_ELEMENTAL)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607 #endif
608 #if defined(PETSC_HAVE_SCALAPACK)
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610 #endif
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614 #if defined(PETSC_HAVE_CUDA)
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
633 #endif
634 #if defined(PETSC_HAVE_HIP)
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
653 #endif
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
664 
665   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
666   PetscFunctionReturn(PETSC_SUCCESS);
667 }
668 
669 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
670 
671 #include <petscdraw.h>
672 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
673 {
674   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
675   PetscMPIInt       rank;
676   PetscViewerType   vtype;
677   PetscBool         iascii, isdraw;
678   PetscViewer       sviewer;
679   PetscViewerFormat format;
680 
681   PetscFunctionBegin;
682   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
683   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
684   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
685   if (iascii) {
686     PetscCall(PetscViewerGetType(viewer, &vtype));
687     PetscCall(PetscViewerGetFormat(viewer, &format));
688     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
689       MatInfo info;
690       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
691       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
692       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
693                                                    (PetscInt)info.memory));
694       PetscCall(PetscViewerFlush(viewer));
695       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
696       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
697       PetscFunctionReturn(PETSC_SUCCESS);
698     } else if (format == PETSC_VIEWER_ASCII_INFO) {
699       PetscFunctionReturn(PETSC_SUCCESS);
700     }
701   } else if (isdraw) {
702     PetscDraw draw;
703     PetscBool isnull;
704 
705     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
706     PetscCall(PetscDrawIsNull(draw, &isnull));
707     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
708   }
709 
710   {
711     /* assemble the entire matrix onto first processor. */
712     Mat          A;
713     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
714     PetscInt    *cols;
715     PetscScalar *vals;
716 
717     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
718     if (rank == 0) {
719       PetscCall(MatSetSizes(A, M, N, M, N));
720     } else {
721       PetscCall(MatSetSizes(A, 0, 0, M, N));
722     }
723     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
724     PetscCall(MatSetType(A, MATMPIDENSE));
725     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
726 
727     /* Copy the matrix ... This isn't the most efficient means,
728        but it's quick for now */
729     A->insertmode = INSERT_VALUES;
730 
731     row = mat->rmap->rstart;
732     m   = mdn->A->rmap->n;
733     for (i = 0; i < m; i++) {
734       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
735       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
736       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
737       row++;
738     }
739 
740     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
741     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
742     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
743     if (rank == 0) {
744       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
745       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
746     }
747     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
748     PetscCall(PetscViewerFlush(viewer));
749     PetscCall(MatDestroy(&A));
750   }
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
755 {
756   PetscBool iascii, isbinary, isdraw, issocket;
757 
758   PetscFunctionBegin;
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
761   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
762   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
763 
764   if (iascii || issocket || isdraw) {
765     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
766   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
767   PetscFunctionReturn(PETSC_SUCCESS);
768 }
769 
770 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
771 {
772   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
773   Mat            mdn = mat->A;
774   PetscLogDouble isend[5], irecv[5];
775 
776   PetscFunctionBegin;
777   info->block_size = 1.0;
778 
779   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
780 
781   isend[0] = info->nz_used;
782   isend[1] = info->nz_allocated;
783   isend[2] = info->nz_unneeded;
784   isend[3] = info->memory;
785   isend[4] = info->mallocs;
786   if (flag == MAT_LOCAL) {
787     info->nz_used      = isend[0];
788     info->nz_allocated = isend[1];
789     info->nz_unneeded  = isend[2];
790     info->memory       = isend[3];
791     info->mallocs      = isend[4];
792   } else if (flag == MAT_GLOBAL_MAX) {
793     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
794 
795     info->nz_used      = irecv[0];
796     info->nz_allocated = irecv[1];
797     info->nz_unneeded  = irecv[2];
798     info->memory       = irecv[3];
799     info->mallocs      = irecv[4];
800   } else if (flag == MAT_GLOBAL_SUM) {
801     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
802 
803     info->nz_used      = irecv[0];
804     info->nz_allocated = irecv[1];
805     info->nz_unneeded  = irecv[2];
806     info->memory       = irecv[3];
807     info->mallocs      = irecv[4];
808   }
809   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
810   info->fill_ratio_needed = 0;
811   info->factor_mallocs    = 0;
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
816 {
817   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
818 
819   PetscFunctionBegin;
820   switch (op) {
821   case MAT_NEW_NONZERO_LOCATIONS:
822   case MAT_NEW_NONZERO_LOCATION_ERR:
823   case MAT_NEW_NONZERO_ALLOCATION_ERR:
824     MatCheckPreallocated(A, 1);
825     PetscCall(MatSetOption(a->A, op, flg));
826     break;
827   case MAT_ROW_ORIENTED:
828     MatCheckPreallocated(A, 1);
829     a->roworiented = flg;
830     PetscCall(MatSetOption(a->A, op, flg));
831     break;
832   case MAT_FORCE_DIAGONAL_ENTRIES:
833   case MAT_KEEP_NONZERO_PATTERN:
834   case MAT_USE_HASH_TABLE:
835   case MAT_SORTED_FULL:
836     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
837     break;
838   case MAT_IGNORE_OFF_PROC_ENTRIES:
839     a->donotstash = flg;
840     break;
841   case MAT_SYMMETRIC:
842   case MAT_STRUCTURALLY_SYMMETRIC:
843   case MAT_HERMITIAN:
844   case MAT_SYMMETRY_ETERNAL:
845   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
846   case MAT_SPD:
847   case MAT_IGNORE_LOWER_TRIANGULAR:
848   case MAT_IGNORE_ZERO_ENTRIES:
849   case MAT_SPD_ETERNAL:
850     /* if the diagonal matrix is square it inherits some of the properties above */
851     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
852     break;
853   default:
854     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
855   }
856   PetscFunctionReturn(PETSC_SUCCESS);
857 }
858 
859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
860 {
861   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
862   const PetscScalar *l;
863   PetscScalar        x, *v, *vv, *r;
864   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
865 
866   PetscFunctionBegin;
867   PetscCall(MatDenseGetArray(mdn->A, &vv));
868   PetscCall(MatDenseGetLDA(mdn->A, &lda));
869   PetscCall(MatGetLocalSize(A, &s2, &s3));
870   if (ll) {
871     PetscCall(VecGetLocalSize(ll, &s2a));
872     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
873     PetscCall(VecGetArrayRead(ll, &l));
874     for (i = 0; i < m; i++) {
875       x = l[i];
876       v = vv + i;
877       for (j = 0; j < n; j++) {
878         (*v) *= x;
879         v += lda;
880       }
881     }
882     PetscCall(VecRestoreArrayRead(ll, &l));
883     PetscCall(PetscLogFlops(1.0 * n * m));
884   }
885   if (rr) {
886     const PetscScalar *ar;
887 
888     PetscCall(VecGetLocalSize(rr, &s3a));
889     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
890     PetscCall(VecGetArrayRead(rr, &ar));
891     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
892     PetscCall(VecGetArray(mdn->lvec, &r));
893     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
894     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
895     PetscCall(VecRestoreArrayRead(rr, &ar));
896     for (i = 0; i < n; i++) {
897       x = r[i];
898       v = vv + i * lda;
899       for (j = 0; j < m; j++) (*v++) *= x;
900     }
901     PetscCall(VecRestoreArray(mdn->lvec, &r));
902     PetscCall(PetscLogFlops(1.0 * n * m));
903   }
904   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
905   PetscFunctionReturn(PETSC_SUCCESS);
906 }
907 
908 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
909 {
910   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
911   PetscInt           i, j;
912   PetscMPIInt        size;
913   PetscReal          sum = 0.0;
914   const PetscScalar *av, *v;
915 
916   PetscFunctionBegin;
917   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
918   v = av;
919   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
920   if (size == 1) {
921     PetscCall(MatNorm(mdn->A, type, nrm));
922   } else {
923     if (type == NORM_FROBENIUS) {
924       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
925         sum += PetscRealPart(PetscConj(*v) * (*v));
926         v++;
927       }
928       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
929       *nrm = PetscSqrtReal(*nrm);
930       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
931     } else if (type == NORM_1) {
932       PetscReal *tmp, *tmp2;
933       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
934       *nrm = 0.0;
935       v    = av;
936       for (j = 0; j < mdn->A->cmap->n; j++) {
937         for (i = 0; i < mdn->A->rmap->n; i++) {
938           tmp[j] += PetscAbsScalar(*v);
939           v++;
940         }
941       }
942       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
943       for (j = 0; j < A->cmap->N; j++) {
944         if (tmp2[j] > *nrm) *nrm = tmp2[j];
945       }
946       PetscCall(PetscFree2(tmp, tmp2));
947       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
948     } else if (type == NORM_INFINITY) { /* max row norm */
949       PetscReal ntemp;
950       PetscCall(MatNorm(mdn->A, type, &ntemp));
951       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
952     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
953   }
954   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
955   PetscFunctionReturn(PETSC_SUCCESS);
956 }
957 
958 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
959 {
960   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
961   Mat           B;
962   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
963   PetscInt      j, i, lda;
964   PetscScalar  *v;
965 
966   PetscFunctionBegin;
967   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
969     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
970     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
971     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
972     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
973   } else B = *matout;
974 
975   m = a->A->rmap->n;
976   n = a->A->cmap->n;
977   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
978   PetscCall(MatDenseGetLDA(a->A, &lda));
979   PetscCall(PetscMalloc1(m, &rwork));
980   for (i = 0; i < m; i++) rwork[i] = rstart + i;
981   for (j = 0; j < n; j++) {
982     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
983     v += lda;
984   }
985   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
986   PetscCall(PetscFree(rwork));
987   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
988   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
989   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
990     *matout = B;
991   } else {
992     PetscCall(MatHeaderMerge(A, &B));
993   }
994   PetscFunctionReturn(PETSC_SUCCESS);
995 }
996 
997 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
998 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
999 
1000 PetscErrorCode MatSetUp_MPIDense(Mat A)
1001 {
1002   PetscFunctionBegin;
1003   PetscCall(PetscLayoutSetUp(A->rmap));
1004   PetscCall(PetscLayoutSetUp(A->cmap));
1005   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008 
1009 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1010 {
1011   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1015   PetscFunctionReturn(PETSC_SUCCESS);
1016 }
1017 
1018 PetscErrorCode MatConjugate_MPIDense(Mat mat)
1019 {
1020   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1021 
1022   PetscFunctionBegin;
1023   PetscCall(MatConjugate(a->A));
1024   PetscFunctionReturn(PETSC_SUCCESS);
1025 }
1026 
1027 PetscErrorCode MatRealPart_MPIDense(Mat A)
1028 {
1029   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1030 
1031   PetscFunctionBegin;
1032   PetscCall(MatRealPart(a->A));
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1037 {
1038   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1039 
1040   PetscFunctionBegin;
1041   PetscCall(MatImaginaryPart(a->A));
1042   PetscFunctionReturn(PETSC_SUCCESS);
1043 }
1044 
1045 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1046 {
1047   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1048 
1049   PetscFunctionBegin;
1050   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1051   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1052   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 
1056 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1057 
1058 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1059 {
1060   PetscInt      i, m, n;
1061   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1062   PetscReal    *work;
1063 
1064   PetscFunctionBegin;
1065   PetscCall(MatGetSize(A, &m, &n));
1066   PetscCall(PetscMalloc1(n, &work));
1067   if (type == REDUCTION_MEAN_REALPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1069   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1071   } else {
1072     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1073   }
1074   if (type == NORM_2) {
1075     for (i = 0; i < n; i++) work[i] *= work[i];
1076   }
1077   if (type == NORM_INFINITY) {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1079   } else {
1080     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1081   }
1082   PetscCall(PetscFree(work));
1083   if (type == NORM_2) {
1084     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1085   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1086     for (i = 0; i < n; i++) reductions[i] /= m;
1087   }
1088   PetscFunctionReturn(PETSC_SUCCESS);
1089 }
1090 
1091 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1092 {
1093   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(MatSetRandom(d->A, rctx));
1097 #if defined(PETSC_HAVE_DEVICE)
1098   x->offloadmask = d->A->offloadmask;
1099 #endif
1100   PetscFunctionReturn(PETSC_SUCCESS);
1101 }
1102 
1103 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1104 {
1105   PetscFunctionBegin;
1106   *missing = PETSC_FALSE;
1107   PetscFunctionReturn(PETSC_SUCCESS);
1108 }
1109 
1110 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1111 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1112 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1113 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1114 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1115 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1116 
1117 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1118                                        MatGetRow_MPIDense,
1119                                        MatRestoreRow_MPIDense,
1120                                        MatMult_MPIDense,
1121                                        /*  4*/ MatMultAdd_MPIDense,
1122                                        MatMultTranspose_MPIDense,
1123                                        MatMultTransposeAdd_MPIDense,
1124                                        NULL,
1125                                        NULL,
1126                                        NULL,
1127                                        /* 10*/ NULL,
1128                                        NULL,
1129                                        NULL,
1130                                        NULL,
1131                                        MatTranspose_MPIDense,
1132                                        /* 15*/ MatGetInfo_MPIDense,
1133                                        MatEqual_MPIDense,
1134                                        MatGetDiagonal_MPIDense,
1135                                        MatDiagonalScale_MPIDense,
1136                                        MatNorm_MPIDense,
1137                                        /* 20*/ MatAssemblyBegin_MPIDense,
1138                                        MatAssemblyEnd_MPIDense,
1139                                        MatSetOption_MPIDense,
1140                                        MatZeroEntries_MPIDense,
1141                                        /* 24*/ MatZeroRows_MPIDense,
1142                                        NULL,
1143                                        NULL,
1144                                        NULL,
1145                                        NULL,
1146                                        /* 29*/ MatSetUp_MPIDense,
1147                                        NULL,
1148                                        NULL,
1149                                        MatGetDiagonalBlock_MPIDense,
1150                                        NULL,
1151                                        /* 34*/ MatDuplicate_MPIDense,
1152                                        NULL,
1153                                        NULL,
1154                                        NULL,
1155                                        NULL,
1156                                        /* 39*/ MatAXPY_MPIDense,
1157                                        MatCreateSubMatrices_MPIDense,
1158                                        NULL,
1159                                        MatGetValues_MPIDense,
1160                                        MatCopy_MPIDense,
1161                                        /* 44*/ NULL,
1162                                        MatScale_MPIDense,
1163                                        MatShift_MPIDense,
1164                                        NULL,
1165                                        NULL,
1166                                        /* 49*/ MatSetRandom_MPIDense,
1167                                        NULL,
1168                                        NULL,
1169                                        NULL,
1170                                        NULL,
1171                                        /* 54*/ NULL,
1172                                        NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        NULL,
1176                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1177                                        MatDestroy_MPIDense,
1178                                        MatView_MPIDense,
1179                                        NULL,
1180                                        NULL,
1181                                        /* 64*/ NULL,
1182                                        NULL,
1183                                        NULL,
1184                                        NULL,
1185                                        NULL,
1186                                        /* 69*/ NULL,
1187                                        NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        NULL,
1191                                        /* 74*/ NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        /* 79*/ NULL,
1197                                        NULL,
1198                                        NULL,
1199                                        NULL,
1200                                        /* 83*/ MatLoad_MPIDense,
1201                                        NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        NULL,
1205                                        NULL,
1206                                        /* 89*/ NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        NULL,
1211                                        /* 94*/ NULL,
1212                                        NULL,
1213                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1214                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1215                                        NULL,
1216                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1217                                        NULL,
1218                                        NULL,
1219                                        MatConjugate_MPIDense,
1220                                        NULL,
1221                                        /*104*/ NULL,
1222                                        MatRealPart_MPIDense,
1223                                        MatImaginaryPart_MPIDense,
1224                                        NULL,
1225                                        NULL,
1226                                        /*109*/ NULL,
1227                                        NULL,
1228                                        NULL,
1229                                        MatGetColumnVector_MPIDense,
1230                                        MatMissingDiagonal_MPIDense,
1231                                        /*114*/ NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        /*119*/ NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        NULL,
1240                                        NULL,
1241                                        /*124*/ NULL,
1242                                        MatGetColumnReductions_MPIDense,
1243                                        NULL,
1244                                        NULL,
1245                                        NULL,
1246                                        /*129*/ NULL,
1247                                        NULL,
1248                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1249                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1250                                        NULL,
1251                                        /*134*/ NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        /*139*/ NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        NULL,
1261                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1262                                        /*145*/ NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        NULL,
1267                                        /*150*/ NULL,
1268                                        NULL};
1269 
1270 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1271 {
1272   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1273   MatType       mtype = MATSEQDENSE;
1274 
1275   PetscFunctionBegin;
1276   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1277   PetscCall(PetscLayoutSetUp(mat->rmap));
1278   PetscCall(PetscLayoutSetUp(mat->cmap));
1279   if (!a->A) {
1280     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1281     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1282   }
1283 #if defined(PETSC_HAVE_CUDA)
1284   PetscBool iscuda;
1285   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1286   if (iscuda) mtype = MATSEQDENSECUDA;
1287 #endif
1288 #if defined(PETSC_HAVE_HIP)
1289   PetscBool iship;
1290   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1291   if (iship) mtype = MATSEQDENSEHIP;
1292 #endif
1293   PetscCall(MatSetType(a->A, mtype));
1294   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1295 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1296   mat->offloadmask = a->A->offloadmask;
1297 #endif
1298   mat->preallocated = PETSC_TRUE;
1299   mat->assembled    = PETSC_TRUE;
1300   PetscFunctionReturn(PETSC_SUCCESS);
1301 }
1302 
1303 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1304 {
1305   Mat B, C;
1306 
1307   PetscFunctionBegin;
1308   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1309   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1310   PetscCall(MatDestroy(&C));
1311   if (reuse == MAT_REUSE_MATRIX) {
1312     C = *newmat;
1313   } else C = NULL;
1314   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1315   PetscCall(MatDestroy(&B));
1316   if (reuse == MAT_INPLACE_MATRIX) {
1317     PetscCall(MatHeaderReplace(A, &C));
1318   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1319   PetscFunctionReturn(PETSC_SUCCESS);
1320 }
1321 
1322 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1323 {
1324   Mat B, C;
1325 
1326   PetscFunctionBegin;
1327   PetscCall(MatDenseGetLocalMatrix(A, &C));
1328   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1329   if (reuse == MAT_REUSE_MATRIX) {
1330     C = *newmat;
1331   } else C = NULL;
1332   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1333   PetscCall(MatDestroy(&B));
1334   if (reuse == MAT_INPLACE_MATRIX) {
1335     PetscCall(MatHeaderReplace(A, &C));
1336   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1337   PetscFunctionReturn(PETSC_SUCCESS);
1338 }
1339 
1340 #if defined(PETSC_HAVE_ELEMENTAL)
1341 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1342 {
1343   Mat          mat_elemental;
1344   PetscScalar *v;
1345   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1346 
1347   PetscFunctionBegin;
1348   if (reuse == MAT_REUSE_MATRIX) {
1349     mat_elemental = *newmat;
1350     PetscCall(MatZeroEntries(*newmat));
1351   } else {
1352     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1353     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1354     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1355     PetscCall(MatSetUp(mat_elemental));
1356     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1357   }
1358 
1359   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1360   for (i = 0; i < N; i++) cols[i] = i;
1361   for (i = 0; i < m; i++) rows[i] = rstart + i;
1362 
1363   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1364   PetscCall(MatDenseGetArray(A, &v));
1365   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1366   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1367   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1368   PetscCall(MatDenseRestoreArray(A, &v));
1369   PetscCall(PetscFree2(rows, cols));
1370 
1371   if (reuse == MAT_INPLACE_MATRIX) {
1372     PetscCall(MatHeaderReplace(A, &mat_elemental));
1373   } else {
1374     *newmat = mat_elemental;
1375   }
1376   PetscFunctionReturn(PETSC_SUCCESS);
1377 }
1378 #endif
1379 
1380 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1381 {
1382   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1383 
1384   PetscFunctionBegin;
1385   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1390 {
1391   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1392 
1393   PetscFunctionBegin;
1394   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1395   PetscFunctionReturn(PETSC_SUCCESS);
1396 }
1397 
1398 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1399 {
1400   Mat_MPIDense *mat;
1401   PetscInt      m, nloc, N;
1402 
1403   PetscFunctionBegin;
1404   PetscCall(MatGetSize(inmat, &m, &N));
1405   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1406   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1407     PetscInt sum;
1408 
1409     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1410     /* Check sum(n) = N */
1411     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1412     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1413 
1414     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1415     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1416   }
1417 
1418   /* numeric phase */
1419   mat = (Mat_MPIDense *)(*outmat)->data;
1420   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1421   PetscFunctionReturn(PETSC_SUCCESS);
1422 }
1423 
1424 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1425 {
1426   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1427   PetscInt      lda;
1428 
1429   PetscFunctionBegin;
1430   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1431   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1432   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1433   a->vecinuse = col + 1;
1434   PetscCall(MatDenseGetLDA(a->A, &lda));
1435   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1436   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1437   *v = a->cvec;
1438   PetscFunctionReturn(PETSC_SUCCESS);
1439 }
1440 
1441 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1442 {
1443   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1444 
1445   PetscFunctionBegin;
1446   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1447   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1448   a->vecinuse = 0;
1449   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1450   PetscCall(VecResetArray(a->cvec));
1451   if (v) *v = NULL;
1452   PetscFunctionReturn(PETSC_SUCCESS);
1453 }
1454 
1455 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1456 {
1457   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1458   PetscInt      lda;
1459 
1460   PetscFunctionBegin;
1461   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1462   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1463   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1464   a->vecinuse = col + 1;
1465   PetscCall(MatDenseGetLDA(a->A, &lda));
1466   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1467   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1468   PetscCall(VecLockReadPush(a->cvec));
1469   *v = a->cvec;
1470   PetscFunctionReturn(PETSC_SUCCESS);
1471 }
1472 
1473 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1474 {
1475   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1476 
1477   PetscFunctionBegin;
1478   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1479   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1480   a->vecinuse = 0;
1481   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1482   PetscCall(VecLockReadPop(a->cvec));
1483   PetscCall(VecResetArray(a->cvec));
1484   if (v) *v = NULL;
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1489 {
1490   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1491   PetscInt      lda;
1492 
1493   PetscFunctionBegin;
1494   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1495   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1496   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1497   a->vecinuse = col + 1;
1498   PetscCall(MatDenseGetLDA(a->A, &lda));
1499   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1500   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1501   *v = a->cvec;
1502   PetscFunctionReturn(PETSC_SUCCESS);
1503 }
1504 
1505 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1506 {
1507   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1508 
1509   PetscFunctionBegin;
1510   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1511   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1512   a->vecinuse = 0;
1513   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1514   PetscCall(VecResetArray(a->cvec));
1515   if (v) *v = NULL;
1516   PetscFunctionReturn(PETSC_SUCCESS);
1517 }
1518 
1519 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1520 {
1521   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1522   Mat_MPIDense *c;
1523   MPI_Comm      comm;
1524   PetscInt      pbegin, pend;
1525 
1526   PetscFunctionBegin;
1527   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1528   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1529   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1530   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1531   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1532   if (!a->cmat) {
1533     PetscCall(MatCreate(comm, &a->cmat));
1534     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1535     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1536     else {
1537       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1538       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1539       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1540     }
1541     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1542     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1543   } else {
1544     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1545     if (same && a->cmat->rmap->N != A->rmap->N) {
1546       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1547       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1548     }
1549     if (!same) {
1550       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1551       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1552       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1553       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1554       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1555     }
1556     if (cend - cbegin != a->cmat->cmap->N) {
1557       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1558       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1559       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1560       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1561     }
1562   }
1563   c = (Mat_MPIDense *)a->cmat->data;
1564   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1565   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1566 
1567   a->cmat->preallocated = PETSC_TRUE;
1568   a->cmat->assembled    = PETSC_TRUE;
1569 #if defined(PETSC_HAVE_DEVICE)
1570   a->cmat->offloadmask = c->A->offloadmask;
1571 #endif
1572   a->matinuse = cbegin + 1;
1573   *v          = a->cmat;
1574   PetscFunctionReturn(PETSC_SUCCESS);
1575 }
1576 
1577 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1578 {
1579   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1580   Mat_MPIDense *c;
1581 
1582   PetscFunctionBegin;
1583   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1584   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1585   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1586   a->matinuse = 0;
1587   c           = (Mat_MPIDense *)a->cmat->data;
1588   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1589   if (v) *v = NULL;
1590 #if defined(PETSC_HAVE_DEVICE)
1591   A->offloadmask = a->A->offloadmask;
1592 #endif
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 /*MC
1597    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1598 
1599    Options Database Key:
1600 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1601 
1602   Level: beginner
1603 
1604 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1605 M*/
1606 PetscErrorCode MatCreate_MPIDense(Mat mat)
1607 {
1608   Mat_MPIDense *a;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(PetscNew(&a));
1612   mat->data   = (void *)a;
1613   mat->ops[0] = MatOps_Values;
1614 
1615   mat->insertmode = NOT_SET_VALUES;
1616 
1617   /* build cache for off array entries formed */
1618   a->donotstash = PETSC_FALSE;
1619 
1620   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1621 
1622   /* stuff used for matrix vector multiply */
1623   a->lvec        = NULL;
1624   a->Mvctx       = NULL;
1625   a->roworiented = PETSC_TRUE;
1626 
1627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1648 #if defined(PETSC_HAVE_ELEMENTAL)
1649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1650 #endif
1651 #if defined(PETSC_HAVE_SCALAPACK)
1652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1653 #endif
1654 #if defined(PETSC_HAVE_CUDA)
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1656 #endif
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1660 #if defined(PETSC_HAVE_CUDA)
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1663 #endif
1664 #if defined(PETSC_HAVE_HIP)
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1668 #endif
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1671   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1672   PetscFunctionReturn(PETSC_SUCCESS);
1673 }
1674 
1675 /*MC
1676    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1677 
1678    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1679    and `MATMPIDENSE` otherwise.
1680 
1681    Options Database Key:
1682 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1683 
1684   Level: beginner
1685 
1686 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1687 M*/
1688 
1689 /*@C
1690    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1691 
1692    Collective
1693 
1694    Input Parameters:
1695 .  B - the matrix
1696 -  data - optional location of matrix data.  Set to `NULL` for PETSc
1697    to control all matrix memory allocation.
1698 
1699    Level: intermediate
1700 
1701    Notes:
1702    The dense format is fully compatible with standard Fortran
1703    storage by columns.
1704 
1705    The data input variable is intended primarily for Fortran programmers
1706    who wish to allocate their own matrix memory space.  Most users should
1707    set `data` to `NULL`.
1708 
1709 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1710 @*/
1711 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1712 {
1713   PetscFunctionBegin;
1714   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1715   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1716   PetscFunctionReturn(PETSC_SUCCESS);
1717 }
1718 
1719 /*@
1720    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1721    array provided by the user. This is useful to avoid copying an array
1722    into a matrix
1723 
1724    Not Collective
1725 
1726    Input Parameters:
1727 +  mat - the matrix
1728 -  array - the array in column major order
1729 
1730    Level: developer
1731 
1732    Note:
1733    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1734    freed when the matrix is destroyed.
1735 
1736 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1737           `MatDenseReplaceArray()`
1738 @*/
1739 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1740 {
1741   PetscFunctionBegin;
1742   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1743   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1744   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1745 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1746   mat->offloadmask = PETSC_OFFLOAD_CPU;
1747 #endif
1748   PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750 
1751 /*@
1752    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1753 
1754    Not Collective
1755 
1756    Input Parameter:
1757 .  mat - the matrix
1758 
1759    Level: developer
1760 
1761    Note:
1762    You can only call this after a call to `MatDensePlaceArray()`
1763 
1764 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1765 @*/
1766 PetscErrorCode MatDenseResetArray(Mat mat)
1767 {
1768   PetscFunctionBegin;
1769   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1770   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1771   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1772   PetscFunctionReturn(PETSC_SUCCESS);
1773 }
1774 
1775 /*@
1776    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1777    array provided by the user. This is useful to avoid copying an array
1778    into a matrix
1779 
1780    Not Collective
1781 
1782    Input Parameters:
1783 +  mat - the matrix
1784 -  array - the array in column major order
1785 
1786    Level: developer
1787 
1788    Note:
1789    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1790    freed by the user. It will be freed when the matrix is destroyed.
1791 
1792 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1793 @*/
1794 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1795 {
1796   PetscFunctionBegin;
1797   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1798   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1799   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1800 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1801   mat->offloadmask = PETSC_OFFLOAD_CPU;
1802 #endif
1803   PetscFunctionReturn(PETSC_SUCCESS);
1804 }
1805 
1806 /*@C
1807    MatCreateDense - Creates a matrix in `MATDENSE` format.
1808 
1809    Collective
1810 
1811    Input Parameters:
1812 +  comm - MPI communicator
1813 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1814 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1815 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1816 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1817 -  data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1818    to control all matrix memory allocation.
1819 
1820    Output Parameter:
1821 .  A - the matrix
1822 
1823    Level: intermediate
1824 
1825    Notes:
1826    The dense format is fully compatible with standard Fortran
1827    storage by columns.
1828 
1829    Although local portions of the matrix are stored in column-major
1830    order, the matrix is partitioned across MPI ranks by row.
1831 
1832    The data input variable is intended primarily for Fortran programmers
1833    who wish to allocate their own matrix memory space.  Most users should
1834    set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1835 
1836    The user MUST specify either the local or global matrix dimensions
1837    (possibly both).
1838 
1839 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1840 @*/
1841 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1842 {
1843   PetscFunctionBegin;
1844   PetscCall(MatCreate(comm, A));
1845   PetscCall(MatSetSizes(*A, m, n, M, N));
1846   PetscCall(MatSetType(*A, MATDENSE));
1847   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1848   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1849   PetscFunctionReturn(PETSC_SUCCESS);
1850 }
1851 
1852 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1853 {
1854   Mat           mat;
1855   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1856 
1857   PetscFunctionBegin;
1858   *newmat = NULL;
1859   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1860   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1861   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1862   a = (Mat_MPIDense *)mat->data;
1863 
1864   mat->factortype   = A->factortype;
1865   mat->assembled    = PETSC_TRUE;
1866   mat->preallocated = PETSC_TRUE;
1867 
1868   mat->insertmode = NOT_SET_VALUES;
1869   a->donotstash   = oldmat->donotstash;
1870 
1871   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1872   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1873 
1874   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1875 
1876   *newmat = mat;
1877   PetscFunctionReturn(PETSC_SUCCESS);
1878 }
1879 
1880 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1881 {
1882   PetscBool isbinary;
1883 #if defined(PETSC_HAVE_HDF5)
1884   PetscBool ishdf5;
1885 #endif
1886 
1887   PetscFunctionBegin;
1888   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1889   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1890   /* force binary viewer to load .info file if it has not yet done so */
1891   PetscCall(PetscViewerSetUp(viewer));
1892   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1893 #if defined(PETSC_HAVE_HDF5)
1894   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1895 #endif
1896   if (isbinary) {
1897     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1898 #if defined(PETSC_HAVE_HDF5)
1899   } else if (ishdf5) {
1900     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1901 #endif
1902   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1903   PetscFunctionReturn(PETSC_SUCCESS);
1904 }
1905 
1906 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1907 {
1908   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1909   Mat           a, b;
1910 
1911   PetscFunctionBegin;
1912   a = matA->A;
1913   b = matB->A;
1914   PetscCall(MatEqual(a, b, flag));
1915   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1916   PetscFunctionReturn(PETSC_SUCCESS);
1917 }
1918 
1919 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1920 {
1921   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1922 
1923   PetscFunctionBegin;
1924   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1925   PetscCall(MatDestroy(&atb->atb));
1926   PetscCall(PetscFree(atb));
1927   PetscFunctionReturn(PETSC_SUCCESS);
1928 }
1929 
1930 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1931 {
1932   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1933 
1934   PetscFunctionBegin;
1935   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1936   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1937   PetscCall(PetscFree(abt));
1938   PetscFunctionReturn(PETSC_SUCCESS);
1939 }
1940 
1941 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1942 {
1943   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1944   Mat_TransMatMultDense *atb;
1945   MPI_Comm               comm;
1946   PetscMPIInt            size, *recvcounts;
1947   PetscScalar           *carray, *sendbuf;
1948   const PetscScalar     *atbarray;
1949   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1950   const PetscInt        *ranges;
1951 
1952   PetscFunctionBegin;
1953   MatCheckProduct(C, 3);
1954   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1955   atb        = (Mat_TransMatMultDense *)C->product->data;
1956   recvcounts = atb->recvcounts;
1957   sendbuf    = atb->sendbuf;
1958 
1959   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1960   PetscCallMPI(MPI_Comm_size(comm, &size));
1961 
1962   /* compute atbarray = aseq^T * bseq */
1963   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1964 
1965   PetscCall(MatGetOwnershipRanges(C, &ranges));
1966 
1967   /* arrange atbarray into sendbuf */
1968   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1969   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1970   for (proc = 0, k = 0; proc < size; proc++) {
1971     for (j = 0; j < cN; j++) {
1972       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1973     }
1974   }
1975   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1976 
1977   /* sum all atbarray to local values of C */
1978   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1979   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1980   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1981   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1982   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1983   PetscFunctionReturn(PETSC_SUCCESS);
1984 }
1985 
1986 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1987 {
1988   MPI_Comm               comm;
1989   PetscMPIInt            size;
1990   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1991   Mat_TransMatMultDense *atb;
1992   PetscBool              cisdense = PETSC_FALSE;
1993   PetscInt               i;
1994   const PetscInt        *ranges;
1995 
1996   PetscFunctionBegin;
1997   MatCheckProduct(C, 4);
1998   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
1999   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2000   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2001     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2002   }
2003 
2004   /* create matrix product C */
2005   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2006 #if defined(PETSC_HAVE_CUDA)
2007   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2008 #endif
2009 #if defined(PETSC_HAVE_HIP)
2010   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2011 #endif
2012   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2013   PetscCall(MatSetUp(C));
2014 
2015   /* create data structure for reuse C */
2016   PetscCallMPI(MPI_Comm_size(comm, &size));
2017   PetscCall(PetscNew(&atb));
2018   cM = C->rmap->N;
2019   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2020   PetscCall(MatGetOwnershipRanges(C, &ranges));
2021   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2022 
2023   C->product->data    = atb;
2024   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2029 {
2030   MPI_Comm               comm;
2031   PetscMPIInt            i, size;
2032   PetscInt               maxRows, bufsiz;
2033   PetscMPIInt            tag;
2034   PetscInt               alg;
2035   Mat_MatTransMultDense *abt;
2036   Mat_Product           *product = C->product;
2037   PetscBool              flg;
2038 
2039   PetscFunctionBegin;
2040   MatCheckProduct(C, 4);
2041   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2042   /* check local size of A and B */
2043   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2044 
2045   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2046   alg = flg ? 0 : 1;
2047 
2048   /* setup matrix product C */
2049   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2050   PetscCall(MatSetType(C, MATMPIDENSE));
2051   PetscCall(MatSetUp(C));
2052   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2053 
2054   /* create data structure for reuse C */
2055   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2056   PetscCallMPI(MPI_Comm_size(comm, &size));
2057   PetscCall(PetscNew(&abt));
2058   abt->tag = tag;
2059   abt->alg = alg;
2060   switch (alg) {
2061   case 1: /* alg: "cyclic" */
2062     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2063     bufsiz = A->cmap->N * maxRows;
2064     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2065     break;
2066   default: /* alg: "allgatherv" */
2067     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2068     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2069     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2070     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2071     break;
2072   }
2073 
2074   C->product->data                = abt;
2075   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2076   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2081 {
2082   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2083   Mat_MatTransMultDense *abt;
2084   MPI_Comm               comm;
2085   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2086   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2087   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2088   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2089   const PetscScalar     *av, *bv;
2090   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2091   MPI_Request            reqs[2];
2092   const PetscInt        *ranges;
2093 
2094   PetscFunctionBegin;
2095   MatCheckProduct(C, 3);
2096   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2097   abt = (Mat_MatTransMultDense *)C->product->data;
2098   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2099   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2100   PetscCallMPI(MPI_Comm_size(comm, &size));
2101   PetscCall(MatDenseGetArrayRead(a->A, &av));
2102   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2103   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2104   PetscCall(MatDenseGetLDA(a->A, &i));
2105   PetscCall(PetscBLASIntCast(i, &alda));
2106   PetscCall(MatDenseGetLDA(b->A, &i));
2107   PetscCall(PetscBLASIntCast(i, &blda));
2108   PetscCall(MatDenseGetLDA(c->A, &i));
2109   PetscCall(PetscBLASIntCast(i, &clda));
2110   PetscCall(MatGetOwnershipRanges(B, &ranges));
2111   bn = B->rmap->n;
2112   if (blda == bn) {
2113     sendbuf = (PetscScalar *)bv;
2114   } else {
2115     sendbuf = abt->buf[0];
2116     for (k = 0, i = 0; i < cK; i++) {
2117       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2118     }
2119   }
2120   if (size > 1) {
2121     sendto   = (rank + size - 1) % size;
2122     recvfrom = (rank + size + 1) % size;
2123   } else {
2124     sendto = recvfrom = 0;
2125   }
2126   PetscCall(PetscBLASIntCast(cK, &ck));
2127   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2128   recvisfrom = rank;
2129   for (i = 0; i < size; i++) {
2130     /* we have finished receiving in sending, bufs can be read/modified */
2131     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2132     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2133 
2134     if (nextrecvisfrom != rank) {
2135       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2136       sendsiz = cK * bn;
2137       recvsiz = cK * nextbn;
2138       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2139       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2140       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2141     }
2142 
2143     /* local aseq * sendbuf^T */
2144     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2145     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2146 
2147     if (nextrecvisfrom != rank) {
2148       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2149       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2150     }
2151     bn         = nextbn;
2152     recvisfrom = nextrecvisfrom;
2153     sendbuf    = recvbuf;
2154   }
2155   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2156   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2157   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2158   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2159   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2160   PetscFunctionReturn(PETSC_SUCCESS);
2161 }
2162 
2163 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2164 {
2165   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2166   Mat_MatTransMultDense *abt;
2167   MPI_Comm               comm;
2168   PetscMPIInt            size;
2169   PetscScalar           *cv, *sendbuf, *recvbuf;
2170   const PetscScalar     *av, *bv;
2171   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2172   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2173   PetscBLASInt           cm, cn, ck, alda, clda;
2174 
2175   PetscFunctionBegin;
2176   MatCheckProduct(C, 3);
2177   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2178   abt = (Mat_MatTransMultDense *)C->product->data;
2179   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2180   PetscCallMPI(MPI_Comm_size(comm, &size));
2181   PetscCall(MatDenseGetArrayRead(a->A, &av));
2182   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2183   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2184   PetscCall(MatDenseGetLDA(a->A, &i));
2185   PetscCall(PetscBLASIntCast(i, &alda));
2186   PetscCall(MatDenseGetLDA(b->A, &blda));
2187   PetscCall(MatDenseGetLDA(c->A, &i));
2188   PetscCall(PetscBLASIntCast(i, &clda));
2189   /* copy transpose of B into buf[0] */
2190   bn      = B->rmap->n;
2191   sendbuf = abt->buf[0];
2192   recvbuf = abt->buf[1];
2193   for (k = 0, j = 0; j < bn; j++) {
2194     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2195   }
2196   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2197   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2198   PetscCall(PetscBLASIntCast(cK, &ck));
2199   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2200   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2201   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2202   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2203   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2204   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2205   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2206   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2207   PetscFunctionReturn(PETSC_SUCCESS);
2208 }
2209 
2210 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2211 {
2212   Mat_MatTransMultDense *abt;
2213 
2214   PetscFunctionBegin;
2215   MatCheckProduct(C, 3);
2216   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2217   abt = (Mat_MatTransMultDense *)C->product->data;
2218   switch (abt->alg) {
2219   case 1:
2220     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2221     break;
2222   default:
2223     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2224     break;
2225   }
2226   PetscFunctionReturn(PETSC_SUCCESS);
2227 }
2228 
2229 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2230 {
2231   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2232 
2233   PetscFunctionBegin;
2234   PetscCall(MatDestroy(&ab->Ce));
2235   PetscCall(MatDestroy(&ab->Ae));
2236   PetscCall(MatDestroy(&ab->Be));
2237   PetscCall(PetscFree(ab));
2238   PetscFunctionReturn(PETSC_SUCCESS);
2239 }
2240 
2241 #if defined(PETSC_HAVE_ELEMENTAL)
2242 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2243 {
2244   Mat_MatMultDense *ab;
2245 
2246   PetscFunctionBegin;
2247   MatCheckProduct(C, 3);
2248   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2249   ab = (Mat_MatMultDense *)C->product->data;
2250   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2251   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2252   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2253   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2254   PetscFunctionReturn(PETSC_SUCCESS);
2255 }
2256 
2257 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2258 {
2259   Mat               Ae, Be, Ce;
2260   Mat_MatMultDense *ab;
2261 
2262   PetscFunctionBegin;
2263   MatCheckProduct(C, 4);
2264   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2265   /* check local size of A and B */
2266   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2267     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2268   }
2269 
2270   /* create elemental matrices Ae and Be */
2271   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2272   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2273   PetscCall(MatSetType(Ae, MATELEMENTAL));
2274   PetscCall(MatSetUp(Ae));
2275   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2276 
2277   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2278   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2279   PetscCall(MatSetType(Be, MATELEMENTAL));
2280   PetscCall(MatSetUp(Be));
2281   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2282 
2283   /* compute symbolic Ce = Ae*Be */
2284   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2285   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2286 
2287   /* setup C */
2288   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2289   PetscCall(MatSetType(C, MATDENSE));
2290   PetscCall(MatSetUp(C));
2291 
2292   /* create data structure for reuse Cdense */
2293   PetscCall(PetscNew(&ab));
2294   ab->Ae = Ae;
2295   ab->Be = Be;
2296   ab->Ce = Ce;
2297 
2298   C->product->data       = ab;
2299   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2300   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2301   PetscFunctionReturn(PETSC_SUCCESS);
2302 }
2303 #endif
2304 
2305 #if defined(PETSC_HAVE_ELEMENTAL)
2306 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2307 {
2308   PetscFunctionBegin;
2309   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2310   C->ops->productsymbolic = MatProductSymbolic_AB;
2311   PetscFunctionReturn(PETSC_SUCCESS);
2312 }
2313 #endif
2314 
2315 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2316 {
2317   Mat_Product *product = C->product;
2318   Mat          A = product->A, B = product->B;
2319 
2320   PetscFunctionBegin;
2321   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2322     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2323   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2324   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2325   PetscFunctionReturn(PETSC_SUCCESS);
2326 }
2327 
2328 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2329 {
2330   Mat_Product *product     = C->product;
2331   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2332   PetscInt     alg, nalg = 2;
2333   PetscBool    flg = PETSC_FALSE;
2334 
2335   PetscFunctionBegin;
2336   /* Set default algorithm */
2337   alg = 0; /* default is allgatherv */
2338   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2339   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2340 
2341   /* Get runtime option */
2342   if (product->api_user) {
2343     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2344     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2345     PetscOptionsEnd();
2346   } else {
2347     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2348     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2349     PetscOptionsEnd();
2350   }
2351   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2352 
2353   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2354   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2355   PetscFunctionReturn(PETSC_SUCCESS);
2356 }
2357 
2358 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2359 {
2360   Mat_Product *product = C->product;
2361 
2362   PetscFunctionBegin;
2363   switch (product->type) {
2364 #if defined(PETSC_HAVE_ELEMENTAL)
2365   case MATPRODUCT_AB:
2366     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2367     break;
2368 #endif
2369   case MATPRODUCT_AtB:
2370     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2371     break;
2372   case MATPRODUCT_ABt:
2373     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2374     break;
2375   default:
2376     break;
2377   }
2378   PetscFunctionReturn(PETSC_SUCCESS);
2379 }
2380