xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0226ec35a99031c5bdd82a492055a25793359ffb)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4    Portions of this code are under:
5    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6 */
7 
8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscblaslapack.h>
11 
12 /*@
13       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the sequential or MPI `MATDENSE` matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27 {
28   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29   PetscBool     flg;
30 
31   PetscFunctionBegin;
32   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
33   PetscValidPointer(B, 2);
34   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35   if (flg) *B = mat->A;
36   else {
37     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39     *B = A;
40   }
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45 {
46   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48 
49   PetscFunctionBegin;
50   PetscCall(MatCopy(Amat->A, Bmat->A, s));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55 {
56   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58   PetscScalar  *v;
59 
60   PetscFunctionBegin;
61   PetscCall(MatDenseGetArray(mat->A, &v));
62   PetscCall(MatDenseGetLDA(mat->A, &lda));
63   rend2 = PetscMin(rend, A->cmap->N);
64   if (rend2 > rstart) {
65     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66     PetscCall(PetscLogFlops(rend2 - rstart));
67   }
68   PetscCall(MatDenseRestoreArray(mat->A, &v));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73 {
74   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76 
77   PetscFunctionBegin;
78   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79   lrow = row - rstart;
80   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85 {
86   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88 
89   PetscFunctionBegin;
90   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91   lrow = row - rstart;
92   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97 {
98   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100   PetscScalar  *array;
101   MPI_Comm      comm;
102   PetscBool     flg;
103   Mat           B;
104 
105   PetscFunctionBegin;
106   PetscCall(MatHasCongruentLayouts(A, &flg));
107   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110 #if defined(PETSC_HAVE_CUDA)
111     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113 #elif (PETSC_HAVE_HIP)
114     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116 #endif
117     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118     PetscCall(MatCreate(comm, &B));
119     PetscCall(MatSetSizes(B, m, m, m, m));
120     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125     *a = B;
126     PetscCall(MatDestroy(&B));
127   } else *a = B;
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132 {
133   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135   PetscBool     roworiented = A->roworiented;
136 
137   PetscFunctionBegin;
138   for (i = 0; i < m; i++) {
139     if (idxm[i] < 0) continue;
140     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141     if (idxm[i] >= rstart && idxm[i] < rend) {
142       row = idxm[i] - rstart;
143       if (roworiented) {
144         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145       } else {
146         for (j = 0; j < n; j++) {
147           if (idxn[j] < 0) continue;
148           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150         }
151       }
152     } else if (!A->donotstash) {
153       mat->assembled = PETSC_FALSE;
154       if (roworiented) {
155         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156       } else {
157         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158       }
159     }
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165 {
166   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168 
169   PetscFunctionBegin;
170   for (i = 0; i < m; i++) {
171     if (idxm[i] < 0) continue; /* negative row */
172     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173     if (idxm[i] >= rstart && idxm[i] < rend) {
174       row = idxm[i] - rstart;
175       for (j = 0; j < n; j++) {
176         if (idxn[j] < 0) continue; /* negative column */
177         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179       }
180     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181   }
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186 {
187   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188 
189   PetscFunctionBegin;
190   PetscCall(MatDenseGetLDA(a->A, lda));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195 {
196   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197   MatType       mtype = MATSEQDENSE;
198 
199   PetscFunctionBegin;
200   if (!a->A) {
201     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202     PetscCall(PetscLayoutSetUp(A->rmap));
203     PetscCall(PetscLayoutSetUp(A->cmap));
204     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206 #if defined(PETSC_HAVE_CUDA)
207     PetscBool iscuda;
208     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209     if (iscuda) mtype = MATSEQDENSECUDA;
210 #elif (PETSC_HAVE_HIP)
211     PetscBool iship;
212     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213     if (iship) mtype = MATSEQDENSEHIP;
214 #endif
215     PetscCall(MatSetType(a->A, mtype));
216   }
217   PetscCall(MatDenseSetLDA(a->A, lda));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222 {
223   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224 
225   PetscFunctionBegin;
226   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDenseGetArray(a->A, array));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232 {
233   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237   PetscCall(MatDenseGetArrayRead(a->A, array));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242 {
243   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244 
245   PetscFunctionBegin;
246   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247   PetscCall(MatDenseGetArrayWrite(a->A, array));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252 {
253   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254 
255   PetscFunctionBegin;
256   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258   PetscCall(MatDensePlaceArray(a->A, array));
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263 {
264   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265 
266   PetscFunctionBegin;
267   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269   PetscCall(MatDenseResetArray(a->A));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274 {
275   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276 
277   PetscFunctionBegin;
278   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280   PetscCall(MatDenseReplaceArray(a->A, array));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285 {
286   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288   const PetscInt    *irow, *icol;
289   const PetscScalar *v;
290   PetscScalar       *bv;
291   Mat                newmat;
292   IS                 iscol_local;
293   MPI_Comm           comm_is, comm_mat;
294 
295   PetscFunctionBegin;
296   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299 
300   PetscCall(ISAllGather(iscol, &iscol_local));
301   PetscCall(ISGetIndices(isrow, &irow));
302   PetscCall(ISGetIndices(iscol_local, &icol));
303   PetscCall(ISGetLocalSize(isrow, &nrows));
304   PetscCall(ISGetLocalSize(iscol, &ncols));
305   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306 
307   /* No parallel redistribution currently supported! Should really check each index set
308      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309      original matrix! */
310 
311   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313 
314   /* Check submatrix call */
315   if (scall == MAT_REUSE_MATRIX) {
316     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317     /* Really need to test rows and column sizes! */
318     newmat = *B;
319   } else {
320     /* Create and fill new matrix */
321     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325   }
326 
327   /* Now extract the data pointers and do the copy, column at a time */
328   newmatd = (Mat_MPIDense *)newmat->data;
329   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331   PetscCall(MatDenseGetLDA(mat->A, &lda));
332   for (i = 0; i < Ncols; i++) {
333     const PetscScalar *av = v + lda * icol[i];
334     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335   }
336   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338 
339   /* Assemble the matrices so that the correct flags are set */
340   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342 
343   /* Free work space */
344   PetscCall(ISRestoreIndices(isrow, &irow));
345   PetscCall(ISRestoreIndices(iscol_local, &icol));
346   PetscCall(ISDestroy(&iscol_local));
347   *B = newmat;
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352 {
353   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354 
355   PetscFunctionBegin;
356   PetscCall(MatDenseRestoreArray(a->A, array));
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361 {
362   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366   PetscFunctionReturn(PETSC_SUCCESS);
367 }
368 
369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370 {
371   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372 
373   PetscFunctionBegin;
374   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      nstash, reallocs;
382 
383   PetscFunctionBegin;
384   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385 
386   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396   PetscMPIInt   n;
397   PetscScalar  *val;
398 
399   PetscFunctionBegin;
400   if (!mdn->donotstash && !mat->nooffprocentries) {
401     /*  wait on receives */
402     while (1) {
403       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404       if (!flg) break;
405 
406       for (i = 0; i < n;) {
407         /* Now identify the consecutive vals belonging to the same row */
408         for (j = i, rstart = row[j]; j < n; j++) {
409           if (row[j] != rstart) break;
410         }
411         if (j < n) ncols = j - i;
412         else ncols = n - i;
413         /* Now assemble all these values with a single function call */
414         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415         i = j;
416       }
417     }
418     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419   }
420 
421   PetscCall(MatAssemblyBegin(mdn->A, mode));
422   PetscCall(MatAssemblyEnd(mdn->A, mode));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427 {
428   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429 
430   PetscFunctionBegin;
431   PetscCall(MatZeroEntries(l->A));
432   PetscFunctionReturn(PETSC_SUCCESS);
433 }
434 
435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436 {
437   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438   PetscInt      i, len, *lrows;
439 
440   PetscFunctionBegin;
441   /* get locally owned rows */
442   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443   /* fix right hand side if needed */
444   if (x && b) {
445     const PetscScalar *xx;
446     PetscScalar       *bb;
447 
448     PetscCall(VecGetArrayRead(x, &xx));
449     PetscCall(VecGetArrayWrite(b, &bb));
450     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451     PetscCall(VecRestoreArrayRead(x, &xx));
452     PetscCall(VecRestoreArrayWrite(b, &bb));
453   }
454   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455   if (diag != 0.0) {
456     Vec d;
457 
458     PetscCall(MatCreateVecs(A, NULL, &d));
459     PetscCall(VecSet(d, diag));
460     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461     PetscCall(VecDestroy(&d));
462   }
463   PetscCall(PetscFree(lrows));
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471 
472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473 {
474   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475   const PetscScalar *ax;
476   PetscScalar       *ay;
477   PetscMemType       axmtype, aymtype;
478 
479   PetscFunctionBegin;
480   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494   const PetscScalar *ax;
495   PetscScalar       *ay;
496   PetscMemType       axmtype, aymtype;
497 
498   PetscFunctionBegin;
499   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511 {
512   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513   const PetscScalar *ax;
514   PetscScalar       *ay;
515   PetscMemType       axmtype, aymtype;
516 
517   PetscFunctionBegin;
518   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519   PetscCall(VecSet(yy, 0.0));
520   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539   PetscCall(VecCopy(yy, zz));
540   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547   PetscFunctionReturn(PETSC_SUCCESS);
548 }
549 
550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551 {
552   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
554   PetscScalar       *x, zero = 0.0;
555   const PetscScalar *av;
556 
557   PetscFunctionBegin;
558   PetscCall(VecSet(v, zero));
559   PetscCall(VecGetArray(v, &x));
560   PetscCall(VecGetSize(v, &n));
561   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563   radd = A->rmap->rstart * m;
564   PetscCall(MatDenseGetArrayRead(a->A, &av));
565   PetscCall(MatDenseGetLDA(a->A, &lda));
566   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568   PetscCall(VecRestoreArray(v, &x));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 PetscErrorCode MatDestroy_MPIDense(Mat mat)
573 {
574   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579 #endif
580   PetscCall(MatStashDestroy_Private(&mat->stash));
581   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583   PetscCall(MatDestroy(&mdn->A));
584   PetscCall(VecDestroy(&mdn->lvec));
585   PetscCall(PetscSFDestroy(&mdn->Mvctx));
586   PetscCall(VecDestroy(&mdn->cvec));
587   PetscCall(MatDestroy(&mdn->cmat));
588 
589   PetscCall(PetscFree(mat->data));
590   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
591 
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605 #if defined(PETSC_HAVE_ELEMENTAL)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607 #endif
608 #if defined(PETSC_HAVE_SCALAPACK)
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610 #endif
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614 #if defined(PETSC_HAVE_CUDA)
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632 #endif
633 #if defined(PETSC_HAVE_HIP)
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651 #endif
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
662 
663   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664   PetscFunctionReturn(PETSC_SUCCESS);
665 }
666 
667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
668 
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671 {
672   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673   PetscMPIInt       rank;
674   PetscViewerType   vtype;
675   PetscBool         iascii, isdraw;
676   PetscViewer       sviewer;
677   PetscViewerFormat format;
678 
679   PetscFunctionBegin;
680   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683   if (iascii) {
684     PetscCall(PetscViewerGetType(viewer, &vtype));
685     PetscCall(PetscViewerGetFormat(viewer, &format));
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691                                                    (PetscInt)info.memory));
692       PetscCall(PetscViewerFlush(viewer));
693       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695       PetscFunctionReturn(PETSC_SUCCESS);
696     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697       PetscFunctionReturn(PETSC_SUCCESS);
698     }
699   } else if (isdraw) {
700     PetscDraw draw;
701     PetscBool isnull;
702 
703     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704     PetscCall(PetscDrawIsNull(draw, &isnull));
705     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706   }
707 
708   {
709     /* assemble the entire matrix onto first processor. */
710     Mat          A;
711     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712     PetscInt    *cols;
713     PetscScalar *vals;
714 
715     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716     if (rank == 0) {
717       PetscCall(MatSetSizes(A, M, N, M, N));
718     } else {
719       PetscCall(MatSetSizes(A, 0, 0, M, N));
720     }
721     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722     PetscCall(MatSetType(A, MATMPIDENSE));
723     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i = 0; i < m; i++) {
732       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735       row++;
736     }
737 
738     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741     if (rank == 0) {
742       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744     }
745     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746     PetscCall(PetscViewerFlush(viewer));
747     PetscCall(MatDestroy(&A));
748   }
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753 {
754   PetscBool iascii, isbinary, isdraw, issocket;
755 
756   PetscFunctionBegin;
757   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
761 
762   if (iascii || issocket || isdraw) {
763     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769 {
770   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771   Mat            mdn = mat->A;
772   PetscLogDouble isend[5], irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size = 1.0;
776 
777   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
778 
779   isend[0] = info->nz_used;
780   isend[1] = info->nz_allocated;
781   isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;
783   isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
792 
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   } else if (flag == MAT_GLOBAL_SUM) {
799     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
800 
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814 {
815   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A, 1);
823     PetscCall(MatSetOption(a->A, op, flg));
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A, 1);
827     a->roworiented = flg;
828     PetscCall(MatSetOption(a->A, op, flg));
829     break;
830   case MAT_FORCE_DIAGONAL_ENTRIES:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833   case MAT_SORTED_FULL:
834     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835     break;
836   case MAT_IGNORE_OFF_PROC_ENTRIES:
837     a->donotstash = flg;
838     break;
839   case MAT_SYMMETRIC:
840   case MAT_STRUCTURALLY_SYMMETRIC:
841   case MAT_HERMITIAN:
842   case MAT_SYMMETRY_ETERNAL:
843   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844   case MAT_SPD:
845   case MAT_IGNORE_LOWER_TRIANGULAR:
846   case MAT_IGNORE_ZERO_ENTRIES:
847   case MAT_SPD_ETERNAL:
848     /* if the diagonal matrix is square it inherits some of the properties above */
849     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850     break;
851   default:
852     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853   }
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860   const PetscScalar *l;
861   PetscScalar        x, *v, *vv, *r;
862   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
863 
864   PetscFunctionBegin;
865   PetscCall(MatDenseGetArray(mdn->A, &vv));
866   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867   PetscCall(MatGetLocalSize(A, &s2, &s3));
868   if (ll) {
869     PetscCall(VecGetLocalSize(ll, &s2a));
870     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871     PetscCall(VecGetArrayRead(ll, &l));
872     for (i = 0; i < m; i++) {
873       x = l[i];
874       v = vv + i;
875       for (j = 0; j < n; j++) {
876         (*v) *= x;
877         v += lda;
878       }
879     }
880     PetscCall(VecRestoreArrayRead(ll, &l));
881     PetscCall(PetscLogFlops(1.0 * n * m));
882   }
883   if (rr) {
884     const PetscScalar *ar;
885 
886     PetscCall(VecGetLocalSize(rr, &s3a));
887     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888     PetscCall(VecGetArrayRead(rr, &ar));
889     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890     PetscCall(VecGetArray(mdn->lvec, &r));
891     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893     PetscCall(VecRestoreArrayRead(rr, &ar));
894     for (i = 0; i < n; i++) {
895       x = r[i];
896       v = vv + i * lda;
897       for (j = 0; j < m; j++) (*v++) *= x;
898     }
899     PetscCall(VecRestoreArray(mdn->lvec, &r));
900     PetscCall(PetscLogFlops(1.0 * n * m));
901   }
902   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907 {
908   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909   PetscInt           i, j;
910   PetscMPIInt        size;
911   PetscReal          sum = 0.0;
912   const PetscScalar *av, *v;
913 
914   PetscFunctionBegin;
915   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916   v = av;
917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918   if (size == 1) {
919     PetscCall(MatNorm(mdn->A, type, nrm));
920   } else {
921     if (type == NORM_FROBENIUS) {
922       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923         sum += PetscRealPart(PetscConj(*v) * (*v));
924         v++;
925       }
926       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927       *nrm = PetscSqrtReal(*nrm);
928       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929     } else if (type == NORM_1) {
930       PetscReal *tmp, *tmp2;
931       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932       *nrm = 0.0;
933       v    = av;
934       for (j = 0; j < mdn->A->cmap->n; j++) {
935         for (i = 0; i < mdn->A->rmap->n; i++) {
936           tmp[j] += PetscAbsScalar(*v);
937           v++;
938         }
939       }
940       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941       for (j = 0; j < A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       PetscCall(PetscFree2(tmp, tmp2));
945       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       PetscCall(MatNorm(mdn->A, type, &ntemp));
949       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951   }
952   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953   PetscFunctionReturn(PETSC_SUCCESS);
954 }
955 
956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957 {
958   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959   Mat           B;
960   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961   PetscInt      j, i, lda;
962   PetscScalar  *v;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971   } else B = *matout;
972 
973   m = a->A->rmap->n;
974   n = a->A->cmap->n;
975   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976   PetscCall(MatDenseGetLDA(a->A, &lda));
977   PetscCall(PetscMalloc1(m, &rwork));
978   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979   for (j = 0; j < n; j++) {
980     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981     v += lda;
982   }
983   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984   PetscCall(PetscFree(rwork));
985   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     PetscCall(MatHeaderMerge(A, &B));
991   }
992   PetscFunctionReturn(PETSC_SUCCESS);
993 }
994 
995 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
997 
998 PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscFunctionBegin;
1001   PetscCall(PetscLayoutSetUp(A->rmap));
1002   PetscCall(PetscLayoutSetUp(A->cmap));
1003   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008 {
1009   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017 {
1018   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1019 
1020   PetscFunctionBegin;
1021   PetscCall(MatConjugate(a->A));
1022   PetscFunctionReturn(PETSC_SUCCESS);
1023 }
1024 
1025 PetscErrorCode MatRealPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatRealPart(a->A));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(MatImaginaryPart(a->A));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044 {
1045   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1046 
1047   PetscFunctionBegin;
1048   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1055 
1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057 {
1058   PetscInt      i, m, n;
1059   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060   PetscReal    *work;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatGetSize(A, &m, &n));
1064   PetscCall(PetscMalloc1(n, &work));
1065   if (type == REDUCTION_MEAN_REALPART) {
1066     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069   } else {
1070     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071   }
1072   if (type == NORM_2) {
1073     for (i = 0; i < n; i++) work[i] *= work[i];
1074   }
1075   if (type == NORM_INFINITY) {
1076     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077   } else {
1078     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079   }
1080   PetscCall(PetscFree(work));
1081   if (type == NORM_2) {
1082     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084     for (i = 0; i < n; i++) reductions[i] /= m;
1085   }
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1090 {
1091   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatSetRandom(d->A, rctx));
1095 #if defined(PETSC_HAVE_DEVICE)
1096   x->offloadmask = d->A->offloadmask;
1097 #endif
1098   PetscFunctionReturn(PETSC_SUCCESS);
1099 }
1100 
1101 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1102 {
1103   PetscFunctionBegin;
1104   *missing = PETSC_FALSE;
1105   PetscFunctionReturn(PETSC_SUCCESS);
1106 }
1107 
1108 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1109 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1110 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1111 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1112 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1113 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1114 
1115 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1116                                        MatGetRow_MPIDense,
1117                                        MatRestoreRow_MPIDense,
1118                                        MatMult_MPIDense,
1119                                        /*  4*/ MatMultAdd_MPIDense,
1120                                        MatMultTranspose_MPIDense,
1121                                        MatMultTransposeAdd_MPIDense,
1122                                        NULL,
1123                                        NULL,
1124                                        NULL,
1125                                        /* 10*/ NULL,
1126                                        NULL,
1127                                        NULL,
1128                                        NULL,
1129                                        MatTranspose_MPIDense,
1130                                        /* 15*/ MatGetInfo_MPIDense,
1131                                        MatEqual_MPIDense,
1132                                        MatGetDiagonal_MPIDense,
1133                                        MatDiagonalScale_MPIDense,
1134                                        MatNorm_MPIDense,
1135                                        /* 20*/ MatAssemblyBegin_MPIDense,
1136                                        MatAssemblyEnd_MPIDense,
1137                                        MatSetOption_MPIDense,
1138                                        MatZeroEntries_MPIDense,
1139                                        /* 24*/ MatZeroRows_MPIDense,
1140                                        NULL,
1141                                        NULL,
1142                                        NULL,
1143                                        NULL,
1144                                        /* 29*/ MatSetUp_MPIDense,
1145                                        NULL,
1146                                        NULL,
1147                                        MatGetDiagonalBlock_MPIDense,
1148                                        NULL,
1149                                        /* 34*/ MatDuplicate_MPIDense,
1150                                        NULL,
1151                                        NULL,
1152                                        NULL,
1153                                        NULL,
1154                                        /* 39*/ MatAXPY_MPIDense,
1155                                        MatCreateSubMatrices_MPIDense,
1156                                        NULL,
1157                                        MatGetValues_MPIDense,
1158                                        MatCopy_MPIDense,
1159                                        /* 44*/ NULL,
1160                                        MatScale_MPIDense,
1161                                        MatShift_MPIDense,
1162                                        NULL,
1163                                        NULL,
1164                                        /* 49*/ MatSetRandom_MPIDense,
1165                                        NULL,
1166                                        NULL,
1167                                        NULL,
1168                                        NULL,
1169                                        /* 54*/ NULL,
1170                                        NULL,
1171                                        NULL,
1172                                        NULL,
1173                                        NULL,
1174                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1175                                        MatDestroy_MPIDense,
1176                                        MatView_MPIDense,
1177                                        NULL,
1178                                        NULL,
1179                                        /* 64*/ NULL,
1180                                        NULL,
1181                                        NULL,
1182                                        NULL,
1183                                        NULL,
1184                                        /* 69*/ NULL,
1185                                        NULL,
1186                                        NULL,
1187                                        NULL,
1188                                        NULL,
1189                                        /* 74*/ NULL,
1190                                        NULL,
1191                                        NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        /* 79*/ NULL,
1195                                        NULL,
1196                                        NULL,
1197                                        NULL,
1198                                        /* 83*/ MatLoad_MPIDense,
1199                                        NULL,
1200                                        NULL,
1201                                        NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        /* 89*/ NULL,
1205                                        NULL,
1206                                        NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        /* 94*/ NULL,
1210                                        NULL,
1211                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1212                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1213                                        NULL,
1214                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1215                                        NULL,
1216                                        NULL,
1217                                        MatConjugate_MPIDense,
1218                                        NULL,
1219                                        /*104*/ NULL,
1220                                        MatRealPart_MPIDense,
1221                                        MatImaginaryPart_MPIDense,
1222                                        NULL,
1223                                        NULL,
1224                                        /*109*/ NULL,
1225                                        NULL,
1226                                        NULL,
1227                                        MatGetColumnVector_MPIDense,
1228                                        MatMissingDiagonal_MPIDense,
1229                                        /*114*/ NULL,
1230                                        NULL,
1231                                        NULL,
1232                                        NULL,
1233                                        NULL,
1234                                        /*119*/ NULL,
1235                                        NULL,
1236                                        NULL,
1237                                        NULL,
1238                                        NULL,
1239                                        /*124*/ NULL,
1240                                        MatGetColumnReductions_MPIDense,
1241                                        NULL,
1242                                        NULL,
1243                                        NULL,
1244                                        /*129*/ NULL,
1245                                        NULL,
1246                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1247                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1248                                        NULL,
1249                                        /*134*/ NULL,
1250                                        NULL,
1251                                        NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        /*139*/ NULL,
1255                                        NULL,
1256                                        NULL,
1257                                        NULL,
1258                                        NULL,
1259                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1260                                        /*145*/ NULL,
1261                                        NULL,
1262                                        NULL,
1263                                        NULL,
1264                                        NULL,
1265                                        /*150*/ NULL,
1266                                        NULL};
1267 
1268 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1269 {
1270   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1271   MatType       mtype = MATSEQDENSE;
1272 
1273   PetscFunctionBegin;
1274   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1275   PetscCall(PetscLayoutSetUp(mat->rmap));
1276   PetscCall(PetscLayoutSetUp(mat->cmap));
1277   if (!a->A) {
1278     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1279     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1280   }
1281 #if defined(PETSC_HAVE_CUDA)
1282   PetscBool iscuda;
1283   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1284   if (iscuda) mtype = MATSEQDENSECUDA;
1285 #endif
1286 #if defined(PETSC_HAVE_HIP)
1287   PetscBool iship;
1288   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1289   if (iship) mtype = MATSEQDENSEHIP;
1290 #endif
1291   PetscCall(MatSetType(a->A, mtype));
1292   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1293 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1294   mat->offloadmask = a->A->offloadmask;
1295 #endif
1296   mat->preallocated = PETSC_TRUE;
1297   mat->assembled    = PETSC_TRUE;
1298   PetscFunctionReturn(PETSC_SUCCESS);
1299 }
1300 
1301 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1302 {
1303   Mat B, C;
1304 
1305   PetscFunctionBegin;
1306   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1307   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1308   PetscCall(MatDestroy(&C));
1309   if (reuse == MAT_REUSE_MATRIX) {
1310     C = *newmat;
1311   } else C = NULL;
1312   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1313   PetscCall(MatDestroy(&B));
1314   if (reuse == MAT_INPLACE_MATRIX) {
1315     PetscCall(MatHeaderReplace(A, &C));
1316   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1317   PetscFunctionReturn(PETSC_SUCCESS);
1318 }
1319 
1320 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1321 {
1322   Mat B, C;
1323 
1324   PetscFunctionBegin;
1325   PetscCall(MatDenseGetLocalMatrix(A, &C));
1326   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1327   if (reuse == MAT_REUSE_MATRIX) {
1328     C = *newmat;
1329   } else C = NULL;
1330   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1331   PetscCall(MatDestroy(&B));
1332   if (reuse == MAT_INPLACE_MATRIX) {
1333     PetscCall(MatHeaderReplace(A, &C));
1334   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1335   PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337 
1338 #if defined(PETSC_HAVE_ELEMENTAL)
1339 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1340 {
1341   Mat          mat_elemental;
1342   PetscScalar *v;
1343   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1344 
1345   PetscFunctionBegin;
1346   if (reuse == MAT_REUSE_MATRIX) {
1347     mat_elemental = *newmat;
1348     PetscCall(MatZeroEntries(*newmat));
1349   } else {
1350     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1351     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1352     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1353     PetscCall(MatSetUp(mat_elemental));
1354     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1355   }
1356 
1357   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1358   for (i = 0; i < N; i++) cols[i] = i;
1359   for (i = 0; i < m; i++) rows[i] = rstart + i;
1360 
1361   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1362   PetscCall(MatDenseGetArray(A, &v));
1363   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1364   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1365   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1366   PetscCall(MatDenseRestoreArray(A, &v));
1367   PetscCall(PetscFree2(rows, cols));
1368 
1369   if (reuse == MAT_INPLACE_MATRIX) {
1370     PetscCall(MatHeaderReplace(A, &mat_elemental));
1371   } else {
1372     *newmat = mat_elemental;
1373   }
1374   PetscFunctionReturn(PETSC_SUCCESS);
1375 }
1376 #endif
1377 
1378 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1379 {
1380   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1381 
1382   PetscFunctionBegin;
1383   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1388 {
1389   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1397 {
1398   Mat_MPIDense *mat;
1399   PetscInt      m, nloc, N;
1400 
1401   PetscFunctionBegin;
1402   PetscCall(MatGetSize(inmat, &m, &N));
1403   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1404   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1405     PetscInt sum;
1406 
1407     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1408     /* Check sum(n) = N */
1409     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1410     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1411 
1412     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1413     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1414   }
1415 
1416   /* numeric phase */
1417   mat = (Mat_MPIDense *)(*outmat)->data;
1418   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1419   PetscFunctionReturn(PETSC_SUCCESS);
1420 }
1421 
1422 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1423 {
1424   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1425   PetscInt      lda;
1426 
1427   PetscFunctionBegin;
1428   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1429   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1430   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1431   a->vecinuse = col + 1;
1432   PetscCall(MatDenseGetLDA(a->A, &lda));
1433   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1434   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1435   *v = a->cvec;
1436   PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438 
1439 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1440 {
1441   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1442 
1443   PetscFunctionBegin;
1444   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1445   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1446   a->vecinuse = 0;
1447   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1448   PetscCall(VecResetArray(a->cvec));
1449   if (v) *v = NULL;
1450   PetscFunctionReturn(PETSC_SUCCESS);
1451 }
1452 
1453 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1454 {
1455   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1456   PetscInt      lda;
1457 
1458   PetscFunctionBegin;
1459   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1460   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1461   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1462   a->vecinuse = col + 1;
1463   PetscCall(MatDenseGetLDA(a->A, &lda));
1464   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1465   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1466   PetscCall(VecLockReadPush(a->cvec));
1467   *v = a->cvec;
1468   PetscFunctionReturn(PETSC_SUCCESS);
1469 }
1470 
1471 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1472 {
1473   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1474 
1475   PetscFunctionBegin;
1476   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1477   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1478   a->vecinuse = 0;
1479   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1480   PetscCall(VecLockReadPop(a->cvec));
1481   PetscCall(VecResetArray(a->cvec));
1482   if (v) *v = NULL;
1483   PetscFunctionReturn(PETSC_SUCCESS);
1484 }
1485 
1486 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1487 {
1488   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1489   PetscInt      lda;
1490 
1491   PetscFunctionBegin;
1492   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1493   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1494   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1495   a->vecinuse = col + 1;
1496   PetscCall(MatDenseGetLDA(a->A, &lda));
1497   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1498   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1499   *v = a->cvec;
1500   PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502 
1503 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1504 {
1505   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1506 
1507   PetscFunctionBegin;
1508   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1509   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1510   a->vecinuse = 0;
1511   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1512   PetscCall(VecResetArray(a->cvec));
1513   if (v) *v = NULL;
1514   PetscFunctionReturn(PETSC_SUCCESS);
1515 }
1516 
1517 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1518 {
1519   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1520   Mat_MPIDense *c;
1521   MPI_Comm      comm;
1522   PetscInt      pbegin, pend;
1523 
1524   PetscFunctionBegin;
1525   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1526   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1527   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1528   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1529   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1530   if (!a->cmat) {
1531     PetscCall(MatCreate(comm, &a->cmat));
1532     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1533     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1534     else {
1535       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1536       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1537       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1538     }
1539     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1540     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1541   } else {
1542     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1543     if (same && a->cmat->rmap->N != A->rmap->N) {
1544       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1545       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1546     }
1547     if (!same) {
1548       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1549       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1550       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1551       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1552       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1553     }
1554     if (cend - cbegin != a->cmat->cmap->N) {
1555       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1556       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1557       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1558       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1559     }
1560   }
1561   c = (Mat_MPIDense *)a->cmat->data;
1562   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1563   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1564 
1565   a->cmat->preallocated = PETSC_TRUE;
1566   a->cmat->assembled    = PETSC_TRUE;
1567 #if defined(PETSC_HAVE_DEVICE)
1568   a->cmat->offloadmask = c->A->offloadmask;
1569 #endif
1570   a->matinuse = cbegin + 1;
1571   *v          = a->cmat;
1572   PetscFunctionReturn(PETSC_SUCCESS);
1573 }
1574 
1575 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1576 {
1577   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1578   Mat_MPIDense *c;
1579 
1580   PetscFunctionBegin;
1581   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1582   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1583   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1584   a->matinuse = 0;
1585   c           = (Mat_MPIDense *)a->cmat->data;
1586   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1587   if (v) *v = NULL;
1588 #if defined(PETSC_HAVE_DEVICE)
1589   A->offloadmask = a->A->offloadmask;
1590 #endif
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 /*MC
1595    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1596 
1597    Options Database Key:
1598 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1599 
1600   Level: beginner
1601 
1602 .seealso: [](chapter_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1603 M*/
1604 PetscErrorCode MatCreate_MPIDense(Mat mat)
1605 {
1606   Mat_MPIDense *a;
1607 
1608   PetscFunctionBegin;
1609   PetscCall(PetscNew(&a));
1610   mat->data = (void *)a;
1611   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1612 
1613   mat->insertmode = NOT_SET_VALUES;
1614 
1615   /* build cache for off array entries formed */
1616   a->donotstash = PETSC_FALSE;
1617 
1618   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1619 
1620   /* stuff used for matrix vector multiply */
1621   a->lvec        = NULL;
1622   a->Mvctx       = NULL;
1623   a->roworiented = PETSC_TRUE;
1624 
1625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1646 #if defined(PETSC_HAVE_ELEMENTAL)
1647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1648 #endif
1649 #if defined(PETSC_HAVE_SCALAPACK)
1650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1651 #endif
1652 #if defined(PETSC_HAVE_CUDA)
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1654 #endif
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1658 #if defined(PETSC_HAVE_CUDA)
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1661 #endif
1662 #if defined(PETSC_HAVE_HIP)
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1666 #endif
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1669   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 /*MC
1674    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1675 
1676    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1677    and `MATMPIDENSE` otherwise.
1678 
1679    Options Database Key:
1680 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1681 
1682   Level: beginner
1683 
1684 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1685 M*/
1686 
1687 /*@C
1688    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1689 
1690    Collective
1691 
1692    Input Parameters:
1693 .  B - the matrix
1694 -  data - optional location of matrix data.  Set to `NULL` for PETSc
1695    to control all matrix memory allocation.
1696 
1697    Level: intermediate
1698 
1699    Notes:
1700    The dense format is fully compatible with standard Fortran
1701    storage by columns.
1702 
1703    The data input variable is intended primarily for Fortran programmers
1704    who wish to allocate their own matrix memory space.  Most users should
1705    set `data` to `NULL`.
1706 
1707 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1708 @*/
1709 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1710 {
1711   PetscFunctionBegin;
1712   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1713   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 /*@
1718    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1719    array provided by the user. This is useful to avoid copying an array
1720    into a matrix
1721 
1722    Not Collective
1723 
1724    Input Parameters:
1725 +  mat - the matrix
1726 -  array - the array in column major order
1727 
1728    Level: developer
1729 
1730    Note:
1731    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1732    freed when the matrix is destroyed.
1733 
1734 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1735           `MatDenseReplaceArray()`
1736 @*/
1737 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1738 {
1739   PetscFunctionBegin;
1740   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1741   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1742   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1743 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1744   mat->offloadmask = PETSC_OFFLOAD_CPU;
1745 #endif
1746   PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748 
1749 /*@
1750    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1751 
1752    Not Collective
1753 
1754    Input Parameters:
1755 .  mat - the matrix
1756 
1757    Level: developer
1758 
1759    Note:
1760    You can only call this after a call to `MatDensePlaceArray()`
1761 
1762 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1763 @*/
1764 PetscErrorCode MatDenseResetArray(Mat mat)
1765 {
1766   PetscFunctionBegin;
1767   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1768   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1769   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 /*@
1774    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1775    array provided by the user. This is useful to avoid copying an array
1776    into a matrix
1777 
1778    Not Collective
1779 
1780    Input Parameters:
1781 +  mat - the matrix
1782 -  array - the array in column major order
1783 
1784    Level: developer
1785 
1786    Note:
1787    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1788    freed by the user. It will be freed when the matrix is destroyed.
1789 
1790 .seealso: [](chapter_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1791 @*/
1792 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1793 {
1794   PetscFunctionBegin;
1795   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1796   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1797   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1798 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1799   mat->offloadmask = PETSC_OFFLOAD_CPU;
1800 #endif
1801   PetscFunctionReturn(PETSC_SUCCESS);
1802 }
1803 
1804 /*@C
1805    MatCreateDense - Creates a matrix in `MATDENSE` format.
1806 
1807    Collective
1808 
1809    Input Parameters:
1810 +  comm - MPI communicator
1811 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1812 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1813 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1814 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1815 -  data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1816    to control all matrix memory allocation.
1817 
1818    Output Parameter:
1819 .  A - the matrix
1820 
1821    Level: intermediate
1822 
1823    Notes:
1824    The dense format is fully compatible with standard Fortran
1825    storage by columns.
1826 
1827    Although local portions of the matrix are stored in column-major
1828    order, the matrix is partitioned across MPI ranks by row.
1829 
1830    The data input variable is intended primarily for Fortran programmers
1831    who wish to allocate their own matrix memory space.  Most users should
1832    set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1833 
1834    The user MUST specify either the local or global matrix dimensions
1835    (possibly both).
1836 
1837 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1838 @*/
1839 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1840 {
1841   PetscFunctionBegin;
1842   PetscCall(MatCreate(comm, A));
1843   PetscCall(MatSetSizes(*A, m, n, M, N));
1844   PetscCall(MatSetType(*A, MATDENSE));
1845   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1846   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1847   PetscFunctionReturn(PETSC_SUCCESS);
1848 }
1849 
1850 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1851 {
1852   Mat           mat;
1853   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1854 
1855   PetscFunctionBegin;
1856   *newmat = NULL;
1857   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1858   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1859   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1860   a = (Mat_MPIDense *)mat->data;
1861 
1862   mat->factortype   = A->factortype;
1863   mat->assembled    = PETSC_TRUE;
1864   mat->preallocated = PETSC_TRUE;
1865 
1866   mat->insertmode = NOT_SET_VALUES;
1867   a->donotstash   = oldmat->donotstash;
1868 
1869   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1870   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1871 
1872   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1873 
1874   *newmat = mat;
1875   PetscFunctionReturn(PETSC_SUCCESS);
1876 }
1877 
1878 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1879 {
1880   PetscBool isbinary;
1881 #if defined(PETSC_HAVE_HDF5)
1882   PetscBool ishdf5;
1883 #endif
1884 
1885   PetscFunctionBegin;
1886   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1887   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1888   /* force binary viewer to load .info file if it has not yet done so */
1889   PetscCall(PetscViewerSetUp(viewer));
1890   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1891 #if defined(PETSC_HAVE_HDF5)
1892   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1893 #endif
1894   if (isbinary) {
1895     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1896 #if defined(PETSC_HAVE_HDF5)
1897   } else if (ishdf5) {
1898     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1899 #endif
1900   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1901   PetscFunctionReturn(PETSC_SUCCESS);
1902 }
1903 
1904 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1905 {
1906   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1907   Mat           a, b;
1908 
1909   PetscFunctionBegin;
1910   a = matA->A;
1911   b = matB->A;
1912   PetscCall(MatEqual(a, b, flag));
1913   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1918 {
1919   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1920 
1921   PetscFunctionBegin;
1922   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1923   PetscCall(MatDestroy(&atb->atb));
1924   PetscCall(PetscFree(atb));
1925   PetscFunctionReturn(PETSC_SUCCESS);
1926 }
1927 
1928 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1929 {
1930   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1934   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1935   PetscCall(PetscFree(abt));
1936   PetscFunctionReturn(PETSC_SUCCESS);
1937 }
1938 
1939 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1940 {
1941   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1942   Mat_TransMatMultDense *atb;
1943   MPI_Comm               comm;
1944   PetscMPIInt            size, *recvcounts;
1945   PetscScalar           *carray, *sendbuf;
1946   const PetscScalar     *atbarray;
1947   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1948   const PetscInt        *ranges;
1949 
1950   PetscFunctionBegin;
1951   MatCheckProduct(C, 3);
1952   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1953   atb        = (Mat_TransMatMultDense *)C->product->data;
1954   recvcounts = atb->recvcounts;
1955   sendbuf    = atb->sendbuf;
1956 
1957   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1958   PetscCallMPI(MPI_Comm_size(comm, &size));
1959 
1960   /* compute atbarray = aseq^T * bseq */
1961   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1962 
1963   PetscCall(MatGetOwnershipRanges(C, &ranges));
1964 
1965   /* arrange atbarray into sendbuf */
1966   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1967   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1968   for (proc = 0, k = 0; proc < size; proc++) {
1969     for (j = 0; j < cN; j++) {
1970       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1971     }
1972   }
1973   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1974 
1975   /* sum all atbarray to local values of C */
1976   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1977   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1978   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1979   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1980   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1985 {
1986   MPI_Comm               comm;
1987   PetscMPIInt            size;
1988   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
1989   Mat_TransMatMultDense *atb;
1990   PetscBool              cisdense = PETSC_FALSE;
1991   PetscInt               i;
1992   const PetscInt        *ranges;
1993 
1994   PetscFunctionBegin;
1995   MatCheckProduct(C, 4);
1996   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
1997   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1998   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1999     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2000   }
2001 
2002   /* create matrix product C */
2003   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2004 #if defined(PETSC_HAVE_CUDA)
2005   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2006 #endif
2007 #if defined(PETSC_HAVE_HIP)
2008   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2009 #endif
2010   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2011   PetscCall(MatSetUp(C));
2012 
2013   /* create data structure for reuse C */
2014   PetscCallMPI(MPI_Comm_size(comm, &size));
2015   PetscCall(PetscNew(&atb));
2016   cM = C->rmap->N;
2017   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2018   PetscCall(MatGetOwnershipRanges(C, &ranges));
2019   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2020 
2021   C->product->data    = atb;
2022   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2023   PetscFunctionReturn(PETSC_SUCCESS);
2024 }
2025 
2026 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2027 {
2028   MPI_Comm               comm;
2029   PetscMPIInt            i, size;
2030   PetscInt               maxRows, bufsiz;
2031   PetscMPIInt            tag;
2032   PetscInt               alg;
2033   Mat_MatTransMultDense *abt;
2034   Mat_Product           *product = C->product;
2035   PetscBool              flg;
2036 
2037   PetscFunctionBegin;
2038   MatCheckProduct(C, 4);
2039   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2040   /* check local size of A and B */
2041   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2042 
2043   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2044   alg = flg ? 0 : 1;
2045 
2046   /* setup matrix product C */
2047   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2048   PetscCall(MatSetType(C, MATMPIDENSE));
2049   PetscCall(MatSetUp(C));
2050   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2051 
2052   /* create data structure for reuse C */
2053   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2054   PetscCallMPI(MPI_Comm_size(comm, &size));
2055   PetscCall(PetscNew(&abt));
2056   abt->tag = tag;
2057   abt->alg = alg;
2058   switch (alg) {
2059   case 1: /* alg: "cyclic" */
2060     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2061     bufsiz = A->cmap->N * maxRows;
2062     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2063     break;
2064   default: /* alg: "allgatherv" */
2065     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2066     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2067     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2068     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2069     break;
2070   }
2071 
2072   C->product->data                = abt;
2073   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2074   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2075   PetscFunctionReturn(PETSC_SUCCESS);
2076 }
2077 
2078 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2079 {
2080   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2081   Mat_MatTransMultDense *abt;
2082   MPI_Comm               comm;
2083   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2084   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2085   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2086   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2087   const PetscScalar     *av, *bv;
2088   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2089   MPI_Request            reqs[2];
2090   const PetscInt        *ranges;
2091 
2092   PetscFunctionBegin;
2093   MatCheckProduct(C, 3);
2094   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2095   abt = (Mat_MatTransMultDense *)C->product->data;
2096   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2097   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2098   PetscCallMPI(MPI_Comm_size(comm, &size));
2099   PetscCall(MatDenseGetArrayRead(a->A, &av));
2100   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2101   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2102   PetscCall(MatDenseGetLDA(a->A, &i));
2103   PetscCall(PetscBLASIntCast(i, &alda));
2104   PetscCall(MatDenseGetLDA(b->A, &i));
2105   PetscCall(PetscBLASIntCast(i, &blda));
2106   PetscCall(MatDenseGetLDA(c->A, &i));
2107   PetscCall(PetscBLASIntCast(i, &clda));
2108   PetscCall(MatGetOwnershipRanges(B, &ranges));
2109   bn = B->rmap->n;
2110   if (blda == bn) {
2111     sendbuf = (PetscScalar *)bv;
2112   } else {
2113     sendbuf = abt->buf[0];
2114     for (k = 0, i = 0; i < cK; i++) {
2115       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2116     }
2117   }
2118   if (size > 1) {
2119     sendto   = (rank + size - 1) % size;
2120     recvfrom = (rank + size + 1) % size;
2121   } else {
2122     sendto = recvfrom = 0;
2123   }
2124   PetscCall(PetscBLASIntCast(cK, &ck));
2125   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2126   recvisfrom = rank;
2127   for (i = 0; i < size; i++) {
2128     /* we have finished receiving in sending, bufs can be read/modified */
2129     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2130     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2131 
2132     if (nextrecvisfrom != rank) {
2133       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2134       sendsiz = cK * bn;
2135       recvsiz = cK * nextbn;
2136       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2137       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2138       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2139     }
2140 
2141     /* local aseq * sendbuf^T */
2142     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2143     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2144 
2145     if (nextrecvisfrom != rank) {
2146       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2147       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2148     }
2149     bn         = nextbn;
2150     recvisfrom = nextrecvisfrom;
2151     sendbuf    = recvbuf;
2152   }
2153   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2154   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2155   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2156   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2157   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2158   PetscFunctionReturn(PETSC_SUCCESS);
2159 }
2160 
2161 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2162 {
2163   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2164   Mat_MatTransMultDense *abt;
2165   MPI_Comm               comm;
2166   PetscMPIInt            size;
2167   PetscScalar           *cv, *sendbuf, *recvbuf;
2168   const PetscScalar     *av, *bv;
2169   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2170   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2171   PetscBLASInt           cm, cn, ck, alda, clda;
2172 
2173   PetscFunctionBegin;
2174   MatCheckProduct(C, 3);
2175   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2176   abt = (Mat_MatTransMultDense *)C->product->data;
2177   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2178   PetscCallMPI(MPI_Comm_size(comm, &size));
2179   PetscCall(MatDenseGetArrayRead(a->A, &av));
2180   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2181   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2182   PetscCall(MatDenseGetLDA(a->A, &i));
2183   PetscCall(PetscBLASIntCast(i, &alda));
2184   PetscCall(MatDenseGetLDA(b->A, &blda));
2185   PetscCall(MatDenseGetLDA(c->A, &i));
2186   PetscCall(PetscBLASIntCast(i, &clda));
2187   /* copy transpose of B into buf[0] */
2188   bn      = B->rmap->n;
2189   sendbuf = abt->buf[0];
2190   recvbuf = abt->buf[1];
2191   for (k = 0, j = 0; j < bn; j++) {
2192     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2193   }
2194   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2195   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2196   PetscCall(PetscBLASIntCast(cK, &ck));
2197   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2198   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2199   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2200   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2201   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2202   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2203   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2204   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2205   PetscFunctionReturn(PETSC_SUCCESS);
2206 }
2207 
2208 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2209 {
2210   Mat_MatTransMultDense *abt;
2211 
2212   PetscFunctionBegin;
2213   MatCheckProduct(C, 3);
2214   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2215   abt = (Mat_MatTransMultDense *)C->product->data;
2216   switch (abt->alg) {
2217   case 1:
2218     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2219     break;
2220   default:
2221     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2222     break;
2223   }
2224   PetscFunctionReturn(PETSC_SUCCESS);
2225 }
2226 
2227 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2228 {
2229   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2230 
2231   PetscFunctionBegin;
2232   PetscCall(MatDestroy(&ab->Ce));
2233   PetscCall(MatDestroy(&ab->Ae));
2234   PetscCall(MatDestroy(&ab->Be));
2235   PetscCall(PetscFree(ab));
2236   PetscFunctionReturn(PETSC_SUCCESS);
2237 }
2238 
2239 #if defined(PETSC_HAVE_ELEMENTAL)
2240 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2241 {
2242   Mat_MatMultDense *ab;
2243 
2244   PetscFunctionBegin;
2245   MatCheckProduct(C, 3);
2246   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2247   ab = (Mat_MatMultDense *)C->product->data;
2248   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2249   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2250   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2251   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2252   PetscFunctionReturn(PETSC_SUCCESS);
2253 }
2254 
2255 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2256 {
2257   Mat               Ae, Be, Ce;
2258   Mat_MatMultDense *ab;
2259 
2260   PetscFunctionBegin;
2261   MatCheckProduct(C, 4);
2262   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2263   /* check local size of A and B */
2264   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2265     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2266   }
2267 
2268   /* create elemental matrices Ae and Be */
2269   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2270   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2271   PetscCall(MatSetType(Ae, MATELEMENTAL));
2272   PetscCall(MatSetUp(Ae));
2273   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2274 
2275   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2276   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2277   PetscCall(MatSetType(Be, MATELEMENTAL));
2278   PetscCall(MatSetUp(Be));
2279   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2280 
2281   /* compute symbolic Ce = Ae*Be */
2282   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2283   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2284 
2285   /* setup C */
2286   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2287   PetscCall(MatSetType(C, MATDENSE));
2288   PetscCall(MatSetUp(C));
2289 
2290   /* create data structure for reuse Cdense */
2291   PetscCall(PetscNew(&ab));
2292   ab->Ae = Ae;
2293   ab->Be = Be;
2294   ab->Ce = Ce;
2295 
2296   C->product->data       = ab;
2297   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2298   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2299   PetscFunctionReturn(PETSC_SUCCESS);
2300 }
2301 #endif
2302 
2303 #if defined(PETSC_HAVE_ELEMENTAL)
2304 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2305 {
2306   PetscFunctionBegin;
2307   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2308   C->ops->productsymbolic = MatProductSymbolic_AB;
2309   PetscFunctionReturn(PETSC_SUCCESS);
2310 }
2311 #endif
2312 
2313 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2314 {
2315   Mat_Product *product = C->product;
2316   Mat          A = product->A, B = product->B;
2317 
2318   PetscFunctionBegin;
2319   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2320     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2321   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2322   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2323   PetscFunctionReturn(PETSC_SUCCESS);
2324 }
2325 
2326 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2327 {
2328   Mat_Product *product     = C->product;
2329   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2330   PetscInt     alg, nalg = 2;
2331   PetscBool    flg = PETSC_FALSE;
2332 
2333   PetscFunctionBegin;
2334   /* Set default algorithm */
2335   alg = 0; /* default is allgatherv */
2336   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2337   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2338 
2339   /* Get runtime option */
2340   if (product->api_user) {
2341     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2342     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2343     PetscOptionsEnd();
2344   } else {
2345     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2346     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2347     PetscOptionsEnd();
2348   }
2349   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2350 
2351   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2352   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2353   PetscFunctionReturn(PETSC_SUCCESS);
2354 }
2355 
2356 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2357 {
2358   Mat_Product *product = C->product;
2359 
2360   PetscFunctionBegin;
2361   switch (product->type) {
2362 #if defined(PETSC_HAVE_ELEMENTAL)
2363   case MATPRODUCT_AB:
2364     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2365     break;
2366 #endif
2367   case MATPRODUCT_AtB:
2368     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2369     break;
2370   case MATPRODUCT_ABt:
2371     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2372     break;
2373   default:
2374     break;
2375   }
2376   PetscFunctionReturn(PETSC_SUCCESS);
2377 }
2378