xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 623b4cf32a4d9ee5e9da0c76cfd89db17cbd9c1f)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
467 {
468   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
469   const PetscScalar *ax;
470   PetscScalar       *ay;
471   PetscMemType       axmtype, aymtype;
472 
473   PetscFunctionBegin;
474   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
475   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
476   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
477   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
478   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
479   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
480   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
481   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
486 {
487   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
488   const PetscScalar *ax;
489   PetscScalar       *ay;
490   PetscMemType       axmtype, aymtype;
491 
492   PetscFunctionBegin;
493   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
494   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
495   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
496   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
497   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
498   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
499   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
500   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
505 {
506   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
507   const PetscScalar *ax;
508   PetscScalar       *ay;
509   PetscMemType       axmtype, aymtype;
510   PetscFunctionBegin;
511   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
512   PetscCall(VecSet(yy, 0.0));
513   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
514   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
515   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
516   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
517   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
518   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
519   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
520   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
521   PetscFunctionReturn(PETSC_SUCCESS);
522 }
523 
524 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
525 {
526   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
527   const PetscScalar *ax;
528   PetscScalar       *ay;
529   PetscMemType       axmtype, aymtype;
530 
531   PetscFunctionBegin;
532   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
533   PetscCall(VecCopy(yy, zz));
534   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
535   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
536   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
537   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
538   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
539   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
540   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
541   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
546 {
547   PetscFunctionBegin;
548   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
549   PetscFunctionReturn(PETSC_SUCCESS);
550 }
551 
552 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
553 {
554   PetscFunctionBegin;
555   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
556   PetscFunctionReturn(PETSC_SUCCESS);
557 }
558 
559 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
560 {
561   PetscFunctionBegin;
562   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
563   PetscFunctionReturn(PETSC_SUCCESS);
564 }
565 
566 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
567 {
568   PetscFunctionBegin;
569   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
570   PetscFunctionReturn(PETSC_SUCCESS);
571 }
572 
573 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
574 {
575   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
576   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
577   PetscScalar       *x;
578   const PetscScalar *av;
579 
580   PetscFunctionBegin;
581   PetscCall(VecGetArray(v, &x));
582   PetscCall(VecGetSize(v, &ng));
583   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
584   PetscCall(VecGetLocalSize(v, &nl));
585   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
586   radd = A->rmap->rstart * m;
587   PetscCall(MatDenseGetArrayRead(a->A, &av));
588   PetscCall(MatDenseGetLDA(a->A, &lda));
589   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
590   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
591   PetscCall(PetscArrayzero(x + i, nl - i));
592   PetscCall(VecRestoreArray(v, &x));
593   PetscFunctionReturn(PETSC_SUCCESS);
594 }
595 
596 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
597 {
598   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
599 
600   PetscFunctionBegin;
601   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
602   PetscCall(MatStashDestroy_Private(&mat->stash));
603   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
604   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
605   PetscCall(MatDestroy(&mdn->A));
606   PetscCall(VecDestroy(&mdn->lvec));
607   PetscCall(PetscSFDestroy(&mdn->Mvctx));
608   PetscCall(VecDestroy(&mdn->cvec));
609   PetscCall(MatDestroy(&mdn->cmat));
610 
611   PetscCall(PetscFree(mat->data));
612   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
613 
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
627 #if defined(PETSC_HAVE_ELEMENTAL)
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
629 #endif
630 #if defined(PETSC_HAVE_SCALAPACK)
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
632 #endif
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
636 #if defined(PETSC_HAVE_CUDA)
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
655 #endif
656 #if defined(PETSC_HAVE_HIP)
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
673   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
674   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
675 #endif
676   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
677   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
678   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
679   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
680   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
681   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
686 
687   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
691 #include <petscdraw.h>
692 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
693 {
694   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
695   PetscMPIInt       rank;
696   PetscViewerType   vtype;
697   PetscBool         iascii, isdraw;
698   PetscViewer       sviewer;
699   PetscViewerFormat format;
700 
701   PetscFunctionBegin;
702   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
703   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
704   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
705   if (iascii) {
706     PetscCall(PetscViewerGetType(viewer, &vtype));
707     PetscCall(PetscViewerGetFormat(viewer, &format));
708     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
709       MatInfo info;
710       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
711       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
712       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
713                                                    (PetscInt)info.memory));
714       PetscCall(PetscViewerFlush(viewer));
715       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
716       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
717       PetscFunctionReturn(PETSC_SUCCESS);
718     } else if (format == PETSC_VIEWER_ASCII_INFO) {
719       PetscFunctionReturn(PETSC_SUCCESS);
720     }
721   } else if (isdraw) {
722     PetscDraw draw;
723     PetscBool isnull;
724 
725     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
726     PetscCall(PetscDrawIsNull(draw, &isnull));
727     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
728   }
729 
730   {
731     /* assemble the entire matrix onto first processor. */
732     Mat          A;
733     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
734     PetscInt    *cols;
735     PetscScalar *vals;
736 
737     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
738     if (rank == 0) {
739       PetscCall(MatSetSizes(A, M, N, M, N));
740     } else {
741       PetscCall(MatSetSizes(A, 0, 0, M, N));
742     }
743     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
744     PetscCall(MatSetType(A, MATMPIDENSE));
745     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
746 
747     /* Copy the matrix ... This isn't the most efficient means,
748        but it's quick for now */
749     A->insertmode = INSERT_VALUES;
750 
751     row = mat->rmap->rstart;
752     m   = mdn->A->rmap->n;
753     for (i = 0; i < m; i++) {
754       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
755       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
756       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
757       row++;
758     }
759 
760     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
761     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
762     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
763     if (rank == 0) {
764       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
765       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
766     }
767     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
768     PetscCall(MatDestroy(&A));
769   }
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
774 {
775   PetscBool iascii, isbinary, isdraw, issocket;
776 
777   PetscFunctionBegin;
778   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
779   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
780   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
781   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
782 
783   if (iascii || issocket || isdraw) {
784     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
785   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
790 {
791   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
792   Mat            mdn = mat->A;
793   PetscLogDouble isend[5], irecv[5];
794 
795   PetscFunctionBegin;
796   info->block_size = 1.0;
797 
798   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
799 
800   isend[0] = info->nz_used;
801   isend[1] = info->nz_allocated;
802   isend[2] = info->nz_unneeded;
803   isend[3] = info->memory;
804   isend[4] = info->mallocs;
805   if (flag == MAT_LOCAL) {
806     info->nz_used      = isend[0];
807     info->nz_allocated = isend[1];
808     info->nz_unneeded  = isend[2];
809     info->memory       = isend[3];
810     info->mallocs      = isend[4];
811   } else if (flag == MAT_GLOBAL_MAX) {
812     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
813 
814     info->nz_used      = irecv[0];
815     info->nz_allocated = irecv[1];
816     info->nz_unneeded  = irecv[2];
817     info->memory       = irecv[3];
818     info->mallocs      = irecv[4];
819   } else if (flag == MAT_GLOBAL_SUM) {
820     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
821 
822     info->nz_used      = irecv[0];
823     info->nz_allocated = irecv[1];
824     info->nz_unneeded  = irecv[2];
825     info->memory       = irecv[3];
826     info->mallocs      = irecv[4];
827   }
828   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
829   info->fill_ratio_needed = 0;
830   info->factor_mallocs    = 0;
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
835 {
836   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
837 
838   PetscFunctionBegin;
839   switch (op) {
840   case MAT_NEW_NONZERO_LOCATIONS:
841   case MAT_NEW_NONZERO_LOCATION_ERR:
842   case MAT_NEW_NONZERO_ALLOCATION_ERR:
843     MatCheckPreallocated(A, 1);
844     PetscCall(MatSetOption(a->A, op, flg));
845     break;
846   case MAT_ROW_ORIENTED:
847     MatCheckPreallocated(A, 1);
848     a->roworiented = flg;
849     PetscCall(MatSetOption(a->A, op, flg));
850     break;
851   case MAT_FORCE_DIAGONAL_ENTRIES:
852   case MAT_KEEP_NONZERO_PATTERN:
853   case MAT_USE_HASH_TABLE:
854   case MAT_SORTED_FULL:
855     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
856     break;
857   case MAT_IGNORE_OFF_PROC_ENTRIES:
858     a->donotstash = flg;
859     break;
860   case MAT_SYMMETRIC:
861   case MAT_STRUCTURALLY_SYMMETRIC:
862   case MAT_HERMITIAN:
863   case MAT_SYMMETRY_ETERNAL:
864   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
865   case MAT_SPD:
866   case MAT_IGNORE_LOWER_TRIANGULAR:
867   case MAT_IGNORE_ZERO_ENTRIES:
868   case MAT_SPD_ETERNAL:
869     /* if the diagonal matrix is square it inherits some of the properties above */
870     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
871     break;
872   default:
873     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
874   }
875   PetscFunctionReturn(PETSC_SUCCESS);
876 }
877 
878 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
879 {
880   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
881   const PetscScalar *l;
882   PetscScalar        x, *v, *vv, *r;
883   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
884 
885   PetscFunctionBegin;
886   PetscCall(MatDenseGetArray(mdn->A, &vv));
887   PetscCall(MatDenseGetLDA(mdn->A, &lda));
888   PetscCall(MatGetLocalSize(A, &s2, &s3));
889   if (ll) {
890     PetscCall(VecGetLocalSize(ll, &s2a));
891     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
892     PetscCall(VecGetArrayRead(ll, &l));
893     for (i = 0; i < m; i++) {
894       x = l[i];
895       v = vv + i;
896       for (j = 0; j < n; j++) {
897         (*v) *= x;
898         v += lda;
899       }
900     }
901     PetscCall(VecRestoreArrayRead(ll, &l));
902     PetscCall(PetscLogFlops(1.0 * n * m));
903   }
904   if (rr) {
905     const PetscScalar *ar;
906 
907     PetscCall(VecGetLocalSize(rr, &s3a));
908     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
909     PetscCall(VecGetArrayRead(rr, &ar));
910     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
911     PetscCall(VecGetArray(mdn->lvec, &r));
912     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
913     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
914     PetscCall(VecRestoreArrayRead(rr, &ar));
915     for (i = 0; i < n; i++) {
916       x = r[i];
917       v = vv + i * lda;
918       for (j = 0; j < m; j++) (*v++) *= x;
919     }
920     PetscCall(VecRestoreArray(mdn->lvec, &r));
921     PetscCall(PetscLogFlops(1.0 * n * m));
922   }
923   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
924   PetscFunctionReturn(PETSC_SUCCESS);
925 }
926 
927 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
928 {
929   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
930   PetscInt           i, j;
931   PetscMPIInt        size;
932   PetscReal          sum = 0.0;
933   const PetscScalar *av, *v;
934 
935   PetscFunctionBegin;
936   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
937   v = av;
938   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
939   if (size == 1) {
940     PetscCall(MatNorm(mdn->A, type, nrm));
941   } else {
942     if (type == NORM_FROBENIUS) {
943       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
944         sum += PetscRealPart(PetscConj(*v) * (*v));
945         v++;
946       }
947       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
948       *nrm = PetscSqrtReal(*nrm);
949       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
950     } else if (type == NORM_1) {
951       PetscReal *tmp, *tmp2;
952       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
953       *nrm = 0.0;
954       v    = av;
955       for (j = 0; j < mdn->A->cmap->n; j++) {
956         for (i = 0; i < mdn->A->rmap->n; i++) {
957           tmp[j] += PetscAbsScalar(*v);
958           v++;
959         }
960       }
961       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
962       for (j = 0; j < A->cmap->N; j++) {
963         if (tmp2[j] > *nrm) *nrm = tmp2[j];
964       }
965       PetscCall(PetscFree2(tmp, tmp2));
966       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
967     } else if (type == NORM_INFINITY) { /* max row norm */
968       PetscReal ntemp;
969       PetscCall(MatNorm(mdn->A, type, &ntemp));
970       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
971     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
972   }
973   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
974   PetscFunctionReturn(PETSC_SUCCESS);
975 }
976 
977 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
978 {
979   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
980   Mat           B;
981   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
982   PetscInt      j, i, lda;
983   PetscScalar  *v;
984 
985   PetscFunctionBegin;
986   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
988     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
989     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
990     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
991     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
992   } else B = *matout;
993 
994   m = a->A->rmap->n;
995   n = a->A->cmap->n;
996   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
997   PetscCall(MatDenseGetLDA(a->A, &lda));
998   PetscCall(PetscMalloc1(m, &rwork));
999   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1000   for (j = 0; j < n; j++) {
1001     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1002     v = PetscSafePointerPlusOffset(v, lda);
1003   }
1004   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1005   PetscCall(PetscFree(rwork));
1006   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1007   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1008   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1009     *matout = B;
1010   } else {
1011     PetscCall(MatHeaderMerge(A, &B));
1012   }
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1017 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1018 
1019 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1020 {
1021   PetscFunctionBegin;
1022   PetscCall(PetscLayoutSetUp(A->rmap));
1023   PetscCall(PetscLayoutSetUp(A->cmap));
1024   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1025   PetscFunctionReturn(PETSC_SUCCESS);
1026 }
1027 
1028 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1029 {
1030   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1031 
1032   PetscFunctionBegin;
1033   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1038 {
1039   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1040 
1041   PetscFunctionBegin;
1042   PetscCall(MatConjugate(a->A));
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1047 {
1048   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1049 
1050   PetscFunctionBegin;
1051   PetscCall(MatRealPart(a->A));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1056 {
1057   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1058 
1059   PetscFunctionBegin;
1060   PetscCall(MatImaginaryPart(a->A));
1061   PetscFunctionReturn(PETSC_SUCCESS);
1062 }
1063 
1064 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1065 {
1066   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1067 
1068   PetscFunctionBegin;
1069   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1070   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1071   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073 }
1074 
1075 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1076 
1077 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1078 {
1079   PetscInt      i, m, n;
1080   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1081   PetscReal    *work;
1082 
1083   PetscFunctionBegin;
1084   PetscCall(MatGetSize(A, &m, &n));
1085   PetscCall(PetscMalloc1(n, &work));
1086   if (type == REDUCTION_MEAN_REALPART) {
1087     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1088   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1089     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1090   } else {
1091     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1092   }
1093   if (type == NORM_2) {
1094     for (i = 0; i < n; i++) work[i] *= work[i];
1095   }
1096   if (type == NORM_INFINITY) {
1097     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1098   } else {
1099     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1100   }
1101   PetscCall(PetscFree(work));
1102   if (type == NORM_2) {
1103     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1104   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1105     for (i = 0; i < n; i++) reductions[i] /= m;
1106   }
1107   PetscFunctionReturn(PETSC_SUCCESS);
1108 }
1109 
1110 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1111 {
1112   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1113 
1114   PetscFunctionBegin;
1115   PetscCall(MatSetRandom(d->A, rctx));
1116 #if defined(PETSC_HAVE_DEVICE)
1117   x->offloadmask = d->A->offloadmask;
1118 #endif
1119   PetscFunctionReturn(PETSC_SUCCESS);
1120 }
1121 
1122 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1123 {
1124   PetscFunctionBegin;
1125   *missing = PETSC_FALSE;
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1130 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1131 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1132 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1133 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1134 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1135 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1136 
1137 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1138                                        MatGetRow_MPIDense,
1139                                        MatRestoreRow_MPIDense,
1140                                        MatMult_MPIDense,
1141                                        /*  4*/ MatMultAdd_MPIDense,
1142                                        MatMultTranspose_MPIDense,
1143                                        MatMultTransposeAdd_MPIDense,
1144                                        NULL,
1145                                        NULL,
1146                                        NULL,
1147                                        /* 10*/ NULL,
1148                                        NULL,
1149                                        NULL,
1150                                        NULL,
1151                                        MatTranspose_MPIDense,
1152                                        /* 15*/ MatGetInfo_MPIDense,
1153                                        MatEqual_MPIDense,
1154                                        MatGetDiagonal_MPIDense,
1155                                        MatDiagonalScale_MPIDense,
1156                                        MatNorm_MPIDense,
1157                                        /* 20*/ MatAssemblyBegin_MPIDense,
1158                                        MatAssemblyEnd_MPIDense,
1159                                        MatSetOption_MPIDense,
1160                                        MatZeroEntries_MPIDense,
1161                                        /* 24*/ MatZeroRows_MPIDense,
1162                                        NULL,
1163                                        NULL,
1164                                        NULL,
1165                                        NULL,
1166                                        /* 29*/ MatSetUp_MPIDense,
1167                                        NULL,
1168                                        NULL,
1169                                        MatGetDiagonalBlock_MPIDense,
1170                                        NULL,
1171                                        /* 34*/ MatDuplicate_MPIDense,
1172                                        NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        NULL,
1176                                        /* 39*/ MatAXPY_MPIDense,
1177                                        MatCreateSubMatrices_MPIDense,
1178                                        NULL,
1179                                        MatGetValues_MPIDense,
1180                                        MatCopy_MPIDense,
1181                                        /* 44*/ NULL,
1182                                        MatScale_MPIDense,
1183                                        MatShift_MPIDense,
1184                                        NULL,
1185                                        NULL,
1186                                        /* 49*/ MatSetRandom_MPIDense,
1187                                        NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        NULL,
1191                                        /* 54*/ NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1197                                        MatDestroy_MPIDense,
1198                                        MatView_MPIDense,
1199                                        NULL,
1200                                        NULL,
1201                                        /* 64*/ NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        NULL,
1205                                        NULL,
1206                                        /* 69*/ NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        NULL,
1211                                        /* 74*/ NULL,
1212                                        NULL,
1213                                        NULL,
1214                                        NULL,
1215                                        NULL,
1216                                        /* 79*/ NULL,
1217                                        NULL,
1218                                        NULL,
1219                                        NULL,
1220                                        /* 83*/ MatLoad_MPIDense,
1221                                        NULL,
1222                                        NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        /* 89*/ NULL,
1227                                        NULL,
1228                                        NULL,
1229                                        NULL,
1230                                        NULL,
1231                                        /* 94*/ NULL,
1232                                        NULL,
1233                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1234                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1235                                        NULL,
1236                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1237                                        NULL,
1238                                        NULL,
1239                                        MatConjugate_MPIDense,
1240                                        NULL,
1241                                        /*104*/ NULL,
1242                                        MatRealPart_MPIDense,
1243                                        MatImaginaryPart_MPIDense,
1244                                        NULL,
1245                                        NULL,
1246                                        /*109*/ NULL,
1247                                        NULL,
1248                                        NULL,
1249                                        MatGetColumnVector_MPIDense,
1250                                        MatMissingDiagonal_MPIDense,
1251                                        /*114*/ NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        /*119*/ NULL,
1257                                        NULL,
1258                                        MatMultHermitianTranspose_MPIDense,
1259                                        MatMultHermitianTransposeAdd_MPIDense,
1260                                        NULL,
1261                                        /*124*/ NULL,
1262                                        MatGetColumnReductions_MPIDense,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /*129*/ NULL,
1267                                        NULL,
1268                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1269                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1270                                        NULL,
1271                                        /*134*/ NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        /*139*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        NULL,
1281                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1282                                        /*145*/ NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        NULL,
1287                                        /*150*/ NULL,
1288                                        NULL};
1289 
1290 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1291 {
1292   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1293   MatType       mtype = MATSEQDENSE;
1294 
1295   PetscFunctionBegin;
1296   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1297   PetscCall(PetscLayoutSetUp(mat->rmap));
1298   PetscCall(PetscLayoutSetUp(mat->cmap));
1299   if (!a->A) {
1300     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1301     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1302   }
1303 #if defined(PETSC_HAVE_CUDA)
1304   PetscBool iscuda;
1305   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1306   if (iscuda) mtype = MATSEQDENSECUDA;
1307 #endif
1308 #if defined(PETSC_HAVE_HIP)
1309   PetscBool iship;
1310   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1311   if (iship) mtype = MATSEQDENSEHIP;
1312 #endif
1313   PetscCall(MatSetType(a->A, mtype));
1314   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1315 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1316   mat->offloadmask = a->A->offloadmask;
1317 #endif
1318   mat->preallocated = PETSC_TRUE;
1319   mat->assembled    = PETSC_TRUE;
1320   PetscFunctionReturn(PETSC_SUCCESS);
1321 }
1322 
1323 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1324 {
1325   Mat B, C;
1326 
1327   PetscFunctionBegin;
1328   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1329   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1330   PetscCall(MatDestroy(&C));
1331   if (reuse == MAT_REUSE_MATRIX) {
1332     C = *newmat;
1333   } else C = NULL;
1334   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1335   PetscCall(MatDestroy(&B));
1336   if (reuse == MAT_INPLACE_MATRIX) {
1337     PetscCall(MatHeaderReplace(A, &C));
1338   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1339   PetscFunctionReturn(PETSC_SUCCESS);
1340 }
1341 
1342 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1343 {
1344   Mat B, C;
1345 
1346   PetscFunctionBegin;
1347   PetscCall(MatDenseGetLocalMatrix(A, &C));
1348   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1349   if (reuse == MAT_REUSE_MATRIX) {
1350     C = *newmat;
1351   } else C = NULL;
1352   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1353   PetscCall(MatDestroy(&B));
1354   if (reuse == MAT_INPLACE_MATRIX) {
1355     PetscCall(MatHeaderReplace(A, &C));
1356   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1357   PetscFunctionReturn(PETSC_SUCCESS);
1358 }
1359 
1360 #if defined(PETSC_HAVE_ELEMENTAL)
1361 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1362 {
1363   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1364   Mat           mat_elemental;
1365   PetscScalar  *v;
1366   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1367 
1368   PetscFunctionBegin;
1369   if (reuse == MAT_REUSE_MATRIX) {
1370     mat_elemental = *newmat;
1371     PetscCall(MatZeroEntries(*newmat));
1372   } else {
1373     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1374     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1375     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1376     PetscCall(MatSetUp(mat_elemental));
1377     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1378   }
1379 
1380   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1381   for (i = 0; i < N; i++) cols[i] = i;
1382   for (i = 0; i < m; i++) rows[i] = rstart + i;
1383 
1384   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1385   PetscCall(MatDenseGetArray(A, &v));
1386   PetscCall(MatDenseGetLDA(a->A, &lda));
1387   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1388   else {
1389     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1390   }
1391   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1392   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1393   PetscCall(MatDenseRestoreArray(A, &v));
1394   PetscCall(PetscFree2(rows, cols));
1395 
1396   if (reuse == MAT_INPLACE_MATRIX) {
1397     PetscCall(MatHeaderReplace(A, &mat_elemental));
1398   } else {
1399     *newmat = mat_elemental;
1400   }
1401   PetscFunctionReturn(PETSC_SUCCESS);
1402 }
1403 #endif
1404 
1405 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1406 {
1407   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1408 
1409   PetscFunctionBegin;
1410   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1411   PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413 
1414 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1415 {
1416   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1417 
1418   PetscFunctionBegin;
1419   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1420   PetscFunctionReturn(PETSC_SUCCESS);
1421 }
1422 
1423 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1424 {
1425   Mat_MPIDense *mat;
1426   PetscInt      m, nloc, N;
1427 
1428   PetscFunctionBegin;
1429   PetscCall(MatGetSize(inmat, &m, &N));
1430   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1431   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1432     PetscInt sum;
1433 
1434     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1435     /* Check sum(n) = N */
1436     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1437     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1438 
1439     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1440     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1441   }
1442 
1443   /* numeric phase */
1444   mat = (Mat_MPIDense *)(*outmat)->data;
1445   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1446   PetscFunctionReturn(PETSC_SUCCESS);
1447 }
1448 
1449 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1450 {
1451   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1452   PetscInt      lda;
1453 
1454   PetscFunctionBegin;
1455   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1456   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1457   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1458   a->vecinuse = col + 1;
1459   PetscCall(MatDenseGetLDA(a->A, &lda));
1460   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1461   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1462   *v = a->cvec;
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1467 {
1468   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1469 
1470   PetscFunctionBegin;
1471   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1472   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1473   a->vecinuse = 0;
1474   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1475   PetscCall(VecResetArray(a->cvec));
1476   if (v) *v = NULL;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1481 {
1482   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1483   PetscInt      lda;
1484 
1485   PetscFunctionBegin;
1486   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1487   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1488   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1489   a->vecinuse = col + 1;
1490   PetscCall(MatDenseGetLDA(a->A, &lda));
1491   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1492   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1493   PetscCall(VecLockReadPush(a->cvec));
1494   *v = a->cvec;
1495   PetscFunctionReturn(PETSC_SUCCESS);
1496 }
1497 
1498 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1499 {
1500   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1501 
1502   PetscFunctionBegin;
1503   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1504   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1505   a->vecinuse = 0;
1506   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1507   PetscCall(VecLockReadPop(a->cvec));
1508   PetscCall(VecResetArray(a->cvec));
1509   if (v) *v = NULL;
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 }
1512 
1513 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1514 {
1515   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1516   PetscInt      lda;
1517 
1518   PetscFunctionBegin;
1519   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1520   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1521   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1522   a->vecinuse = col + 1;
1523   PetscCall(MatDenseGetLDA(a->A, &lda));
1524   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1525   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1526   *v = a->cvec;
1527   PetscFunctionReturn(PETSC_SUCCESS);
1528 }
1529 
1530 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1531 {
1532   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1533 
1534   PetscFunctionBegin;
1535   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1536   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1537   a->vecinuse = 0;
1538   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1539   PetscCall(VecResetArray(a->cvec));
1540   if (v) *v = NULL;
1541   PetscFunctionReturn(PETSC_SUCCESS);
1542 }
1543 
1544 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1545 {
1546   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1547   Mat_MPIDense *c;
1548   MPI_Comm      comm;
1549   PetscInt      pbegin, pend;
1550 
1551   PetscFunctionBegin;
1552   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1553   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1554   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1555   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1556   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1557   if (!a->cmat) {
1558     PetscCall(MatCreate(comm, &a->cmat));
1559     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1560     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1561     else {
1562       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1563       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1564       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1565     }
1566     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1567     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1568   } else {
1569     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1570     if (same && a->cmat->rmap->N != A->rmap->N) {
1571       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1572       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1573     }
1574     if (!same) {
1575       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1576       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1577       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1578       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1579       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1580     }
1581     if (cend - cbegin != a->cmat->cmap->N) {
1582       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1583       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1584       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1585       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1586     }
1587   }
1588   c = (Mat_MPIDense *)a->cmat->data;
1589   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1590   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1591 
1592   a->cmat->preallocated = PETSC_TRUE;
1593   a->cmat->assembled    = PETSC_TRUE;
1594 #if defined(PETSC_HAVE_DEVICE)
1595   a->cmat->offloadmask = c->A->offloadmask;
1596 #endif
1597   a->matinuse = cbegin + 1;
1598   *v          = a->cmat;
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1603 {
1604   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1605   Mat_MPIDense *c;
1606 
1607   PetscFunctionBegin;
1608   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1609   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1610   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1611   a->matinuse = 0;
1612   c           = (Mat_MPIDense *)a->cmat->data;
1613   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1614   if (v) *v = NULL;
1615 #if defined(PETSC_HAVE_DEVICE)
1616   A->offloadmask = a->A->offloadmask;
1617 #endif
1618   PetscFunctionReturn(PETSC_SUCCESS);
1619 }
1620 
1621 /*MC
1622    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1623 
1624    Options Database Key:
1625 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1626 
1627   Level: beginner
1628 
1629 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1630 M*/
1631 PetscErrorCode MatCreate_MPIDense(Mat mat)
1632 {
1633   Mat_MPIDense *a;
1634 
1635   PetscFunctionBegin;
1636   PetscCall(PetscNew(&a));
1637   mat->data   = (void *)a;
1638   mat->ops[0] = MatOps_Values;
1639 
1640   mat->insertmode = NOT_SET_VALUES;
1641 
1642   /* build cache for off array entries formed */
1643   a->donotstash = PETSC_FALSE;
1644 
1645   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1646 
1647   /* stuff used for matrix vector multiply */
1648   a->lvec        = NULL;
1649   a->Mvctx       = NULL;
1650   a->roworiented = PETSC_TRUE;
1651 
1652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1673 #if defined(PETSC_HAVE_ELEMENTAL)
1674   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1675 #endif
1676 #if defined(PETSC_HAVE_SCALAPACK)
1677   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1678 #endif
1679 #if defined(PETSC_HAVE_CUDA)
1680   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1681 #endif
1682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1685 #if defined(PETSC_HAVE_CUDA)
1686   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1688 #endif
1689 #if defined(PETSC_HAVE_HIP)
1690   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1693 #endif
1694   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1695   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1696   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1697   PetscFunctionReturn(PETSC_SUCCESS);
1698 }
1699 
1700 /*MC
1701    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1702 
1703    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1704    and `MATMPIDENSE` otherwise.
1705 
1706    Options Database Key:
1707 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1708 
1709   Level: beginner
1710 
1711 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1712 M*/
1713 
1714 /*@C
1715   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1716 
1717   Collective
1718 
1719   Input Parameters:
1720 + B    - the matrix
1721 - data - optional location of matrix data.  Set to `NULL` for PETSc
1722    to control all matrix memory allocation.
1723 
1724   Level: intermediate
1725 
1726   Notes:
1727   The dense format is fully compatible with standard Fortran
1728   storage by columns.
1729 
1730   The data input variable is intended primarily for Fortran programmers
1731   who wish to allocate their own matrix memory space.  Most users should
1732   set `data` to `NULL`.
1733 
1734 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1735 @*/
1736 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1737 {
1738   PetscFunctionBegin;
1739   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1740   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1741   PetscFunctionReturn(PETSC_SUCCESS);
1742 }
1743 
1744 /*@
1745   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1746   array provided by the user. This is useful to avoid copying an array
1747   into a matrix
1748 
1749   Not Collective
1750 
1751   Input Parameters:
1752 + mat   - the matrix
1753 - array - the array in column major order
1754 
1755   Level: developer
1756 
1757   Note:
1758   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1759   freed when the matrix is destroyed.
1760 
1761 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1762           `MatDenseReplaceArray()`
1763 @*/
1764 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1765 {
1766   PetscFunctionBegin;
1767   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1768   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1769   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1770 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1771   mat->offloadmask = PETSC_OFFLOAD_CPU;
1772 #endif
1773   PetscFunctionReturn(PETSC_SUCCESS);
1774 }
1775 
1776 /*@
1777   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1778 
1779   Not Collective
1780 
1781   Input Parameter:
1782 . mat - the matrix
1783 
1784   Level: developer
1785 
1786   Note:
1787   You can only call this after a call to `MatDensePlaceArray()`
1788 
1789 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1790 @*/
1791 PetscErrorCode MatDenseResetArray(Mat mat)
1792 {
1793   PetscFunctionBegin;
1794   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1795   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1796   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1797   PetscFunctionReturn(PETSC_SUCCESS);
1798 }
1799 
1800 /*@
1801   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1802   array provided by the user. This is useful to avoid copying an array
1803   into a matrix
1804 
1805   Not Collective
1806 
1807   Input Parameters:
1808 + mat   - the matrix
1809 - array - the array in column major order
1810 
1811   Level: developer
1812 
1813   Note:
1814   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1815   freed by the user. It will be freed when the matrix is destroyed.
1816 
1817 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1818 @*/
1819 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1820 {
1821   PetscFunctionBegin;
1822   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1823   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1824   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1825 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1826   mat->offloadmask = PETSC_OFFLOAD_CPU;
1827 #endif
1828   PetscFunctionReturn(PETSC_SUCCESS);
1829 }
1830 
1831 /*@C
1832   MatCreateDense - Creates a matrix in `MATDENSE` format.
1833 
1834   Collective
1835 
1836   Input Parameters:
1837 + comm - MPI communicator
1838 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1839 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1840 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1841 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1842 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1843    to control all matrix memory allocation.
1844 
1845   Output Parameter:
1846 . A - the matrix
1847 
1848   Level: intermediate
1849 
1850   Notes:
1851   The dense format is fully compatible with standard Fortran
1852   storage by columns.
1853 
1854   Although local portions of the matrix are stored in column-major
1855   order, the matrix is partitioned across MPI ranks by row.
1856 
1857   The data input variable is intended primarily for Fortran programmers
1858   who wish to allocate their own matrix memory space.  Most users should
1859   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1860 
1861   The user MUST specify either the local or global matrix dimensions
1862   (possibly both).
1863 
1864 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1865 @*/
1866 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1867 {
1868   PetscFunctionBegin;
1869   PetscCall(MatCreate(comm, A));
1870   PetscCall(MatSetSizes(*A, m, n, M, N));
1871   PetscCall(MatSetType(*A, MATDENSE));
1872   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1873   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1874   PetscFunctionReturn(PETSC_SUCCESS);
1875 }
1876 
1877 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1878 {
1879   Mat           mat;
1880   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1881 
1882   PetscFunctionBegin;
1883   *newmat = NULL;
1884   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1885   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1886   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1887   a = (Mat_MPIDense *)mat->data;
1888 
1889   mat->factortype   = A->factortype;
1890   mat->assembled    = PETSC_TRUE;
1891   mat->preallocated = PETSC_TRUE;
1892 
1893   mat->insertmode = NOT_SET_VALUES;
1894   a->donotstash   = oldmat->donotstash;
1895 
1896   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1897   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1898 
1899   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1900 
1901   *newmat = mat;
1902   PetscFunctionReturn(PETSC_SUCCESS);
1903 }
1904 
1905 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1906 {
1907   PetscBool isbinary;
1908 #if defined(PETSC_HAVE_HDF5)
1909   PetscBool ishdf5;
1910 #endif
1911 
1912   PetscFunctionBegin;
1913   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1914   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1915   /* force binary viewer to load .info file if it has not yet done so */
1916   PetscCall(PetscViewerSetUp(viewer));
1917   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1918 #if defined(PETSC_HAVE_HDF5)
1919   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1920 #endif
1921   if (isbinary) {
1922     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1923 #if defined(PETSC_HAVE_HDF5)
1924   } else if (ishdf5) {
1925     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1926 #endif
1927   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1928   PetscFunctionReturn(PETSC_SUCCESS);
1929 }
1930 
1931 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1932 {
1933   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1934   Mat           a, b;
1935 
1936   PetscFunctionBegin;
1937   a = matA->A;
1938   b = matB->A;
1939   PetscCall(MatEqual(a, b, flag));
1940   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1941   PetscFunctionReturn(PETSC_SUCCESS);
1942 }
1943 
1944 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1945 {
1946   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1947 
1948   PetscFunctionBegin;
1949   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1950   PetscCall(MatDestroy(&atb->atb));
1951   PetscCall(PetscFree(atb));
1952   PetscFunctionReturn(PETSC_SUCCESS);
1953 }
1954 
1955 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1956 {
1957   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1958 
1959   PetscFunctionBegin;
1960   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1961   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1962   PetscCall(PetscFree(abt));
1963   PetscFunctionReturn(PETSC_SUCCESS);
1964 }
1965 
1966 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1967 {
1968   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1969   Mat_TransMatMultDense *atb;
1970   MPI_Comm               comm;
1971   PetscMPIInt            size, *recvcounts;
1972   PetscScalar           *carray, *sendbuf;
1973   const PetscScalar     *atbarray;
1974   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1975   const PetscInt        *ranges;
1976 
1977   PetscFunctionBegin;
1978   MatCheckProduct(C, 3);
1979   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1980   atb        = (Mat_TransMatMultDense *)C->product->data;
1981   recvcounts = atb->recvcounts;
1982   sendbuf    = atb->sendbuf;
1983 
1984   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1985   PetscCallMPI(MPI_Comm_size(comm, &size));
1986 
1987   /* compute atbarray = aseq^T * bseq */
1988   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1989 
1990   PetscCall(MatGetOwnershipRanges(C, &ranges));
1991 
1992   /* arrange atbarray into sendbuf */
1993   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1994   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1995   for (proc = 0, k = 0; proc < size; proc++) {
1996     for (j = 0; j < cN; j++) {
1997       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1998     }
1999   }
2000   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2001 
2002   /* sum all atbarray to local values of C */
2003   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2004   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2005   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2006   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2007   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2008   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2009   PetscFunctionReturn(PETSC_SUCCESS);
2010 }
2011 
2012 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2013 {
2014   MPI_Comm               comm;
2015   PetscMPIInt            size;
2016   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2017   Mat_TransMatMultDense *atb;
2018   PetscBool              cisdense = PETSC_FALSE;
2019   PetscInt               i;
2020   const PetscInt        *ranges;
2021 
2022   PetscFunctionBegin;
2023   MatCheckProduct(C, 4);
2024   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2025   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2026   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2027     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2028   }
2029 
2030   /* create matrix product C */
2031   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2032 #if defined(PETSC_HAVE_CUDA)
2033   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2034 #endif
2035 #if defined(PETSC_HAVE_HIP)
2036   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2037 #endif
2038   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2039   PetscCall(MatSetUp(C));
2040 
2041   /* create data structure for reuse C */
2042   PetscCallMPI(MPI_Comm_size(comm, &size));
2043   PetscCall(PetscNew(&atb));
2044   cM = C->rmap->N;
2045   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2046   PetscCall(MatGetOwnershipRanges(C, &ranges));
2047   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2048 
2049   C->product->data    = atb;
2050   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2051   PetscFunctionReturn(PETSC_SUCCESS);
2052 }
2053 
2054 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2055 {
2056   MPI_Comm               comm;
2057   PetscMPIInt            i, size;
2058   PetscInt               maxRows, bufsiz;
2059   PetscMPIInt            tag;
2060   PetscInt               alg;
2061   Mat_MatTransMultDense *abt;
2062   Mat_Product           *product = C->product;
2063   PetscBool              flg;
2064 
2065   PetscFunctionBegin;
2066   MatCheckProduct(C, 4);
2067   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2068   /* check local size of A and B */
2069   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2070 
2071   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2072   alg = flg ? 0 : 1;
2073 
2074   /* setup matrix product C */
2075   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2076   PetscCall(MatSetType(C, MATMPIDENSE));
2077   PetscCall(MatSetUp(C));
2078   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2079 
2080   /* create data structure for reuse C */
2081   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2082   PetscCallMPI(MPI_Comm_size(comm, &size));
2083   PetscCall(PetscNew(&abt));
2084   abt->tag = tag;
2085   abt->alg = alg;
2086   switch (alg) {
2087   case 1: /* alg: "cyclic" */
2088     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2089     bufsiz = A->cmap->N * maxRows;
2090     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2091     break;
2092   default: /* alg: "allgatherv" */
2093     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2094     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2095     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2096     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2097     break;
2098   }
2099 
2100   C->product->data                = abt;
2101   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2102   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2103   PetscFunctionReturn(PETSC_SUCCESS);
2104 }
2105 
2106 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2107 {
2108   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2109   Mat_MatTransMultDense *abt;
2110   MPI_Comm               comm;
2111   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2112   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2113   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2114   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2115   const PetscScalar     *av, *bv;
2116   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2117   MPI_Request            reqs[2];
2118   const PetscInt        *ranges;
2119 
2120   PetscFunctionBegin;
2121   MatCheckProduct(C, 3);
2122   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2123   abt = (Mat_MatTransMultDense *)C->product->data;
2124   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2125   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2126   PetscCallMPI(MPI_Comm_size(comm, &size));
2127   PetscCall(MatDenseGetArrayRead(a->A, &av));
2128   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2129   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2130   PetscCall(MatDenseGetLDA(a->A, &i));
2131   PetscCall(PetscBLASIntCast(i, &alda));
2132   PetscCall(MatDenseGetLDA(b->A, &i));
2133   PetscCall(PetscBLASIntCast(i, &blda));
2134   PetscCall(MatDenseGetLDA(c->A, &i));
2135   PetscCall(PetscBLASIntCast(i, &clda));
2136   PetscCall(MatGetOwnershipRanges(B, &ranges));
2137   bn = B->rmap->n;
2138   if (blda == bn) {
2139     sendbuf = (PetscScalar *)bv;
2140   } else {
2141     sendbuf = abt->buf[0];
2142     for (k = 0, i = 0; i < cK; i++) {
2143       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2144     }
2145   }
2146   if (size > 1) {
2147     sendto   = (rank + size - 1) % size;
2148     recvfrom = (rank + size + 1) % size;
2149   } else {
2150     sendto = recvfrom = 0;
2151   }
2152   PetscCall(PetscBLASIntCast(cK, &ck));
2153   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2154   recvisfrom = rank;
2155   for (i = 0; i < size; i++) {
2156     /* we have finished receiving in sending, bufs can be read/modified */
2157     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2158     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2159 
2160     if (nextrecvisfrom != rank) {
2161       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2162       sendsiz = cK * bn;
2163       recvsiz = cK * nextbn;
2164       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2165       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2166       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2167     }
2168 
2169     /* local aseq * sendbuf^T */
2170     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2171     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2172 
2173     if (nextrecvisfrom != rank) {
2174       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2175       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2176     }
2177     bn         = nextbn;
2178     recvisfrom = nextrecvisfrom;
2179     sendbuf    = recvbuf;
2180   }
2181   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2182   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2183   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2184   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2185   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2186   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2187   PetscFunctionReturn(PETSC_SUCCESS);
2188 }
2189 
2190 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2191 {
2192   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2193   Mat_MatTransMultDense *abt;
2194   MPI_Comm               comm;
2195   PetscMPIInt            size;
2196   PetscScalar           *cv, *sendbuf, *recvbuf;
2197   const PetscScalar     *av, *bv;
2198   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2199   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2200   PetscBLASInt           cm, cn, ck, alda, clda;
2201 
2202   PetscFunctionBegin;
2203   MatCheckProduct(C, 3);
2204   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2205   abt = (Mat_MatTransMultDense *)C->product->data;
2206   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2207   PetscCallMPI(MPI_Comm_size(comm, &size));
2208   PetscCall(MatDenseGetArrayRead(a->A, &av));
2209   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2210   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2211   PetscCall(MatDenseGetLDA(a->A, &i));
2212   PetscCall(PetscBLASIntCast(i, &alda));
2213   PetscCall(MatDenseGetLDA(b->A, &blda));
2214   PetscCall(MatDenseGetLDA(c->A, &i));
2215   PetscCall(PetscBLASIntCast(i, &clda));
2216   /* copy transpose of B into buf[0] */
2217   bn      = B->rmap->n;
2218   sendbuf = abt->buf[0];
2219   recvbuf = abt->buf[1];
2220   for (k = 0, j = 0; j < bn; j++) {
2221     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2222   }
2223   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2224   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2225   PetscCall(PetscBLASIntCast(cK, &ck));
2226   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2227   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2228   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2229   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2230   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2231   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2232   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2233   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2234   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2235   PetscFunctionReturn(PETSC_SUCCESS);
2236 }
2237 
2238 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2239 {
2240   Mat_MatTransMultDense *abt;
2241 
2242   PetscFunctionBegin;
2243   MatCheckProduct(C, 3);
2244   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2245   abt = (Mat_MatTransMultDense *)C->product->data;
2246   switch (abt->alg) {
2247   case 1:
2248     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2249     break;
2250   default:
2251     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2252     break;
2253   }
2254   PetscFunctionReturn(PETSC_SUCCESS);
2255 }
2256 
2257 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2258 {
2259   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2260 
2261   PetscFunctionBegin;
2262   PetscCall(MatDestroy(&ab->Ce));
2263   PetscCall(MatDestroy(&ab->Ae));
2264   PetscCall(MatDestroy(&ab->Be));
2265   PetscCall(PetscFree(ab));
2266   PetscFunctionReturn(PETSC_SUCCESS);
2267 }
2268 
2269 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2270 {
2271   Mat_MatMultDense *ab;
2272   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2273 
2274   PetscFunctionBegin;
2275   MatCheckProduct(C, 3);
2276   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2277   ab = (Mat_MatMultDense *)C->product->data;
2278   if (ab->Ae && ab->Ce) {
2279 #if PetscDefined(HAVE_ELEMENTAL)
2280     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2281     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2282     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2283     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2284 #else
2285     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2286 #endif
2287   } else {
2288     const PetscScalar *read;
2289     PetscScalar       *write;
2290     PetscInt           lda;
2291 
2292     PetscCall(MatDenseGetLDA(B, &lda));
2293     PetscCall(MatDenseGetArrayRead(B, &read));
2294     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2295     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2296     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2297       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2298       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2299     }
2300     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2301     PetscCall(MatDenseRestoreArrayRead(B, &read));
2302     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)(A->data))->A, ab->Be, ((Mat_MPIDense *)(C->data))->A));
2303   }
2304   PetscFunctionReturn(PETSC_SUCCESS);
2305 }
2306 
2307 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2308 {
2309   Mat_Product      *product = C->product;
2310   PetscInt          alg;
2311   Mat_MatMultDense *ab;
2312   PetscBool         flg;
2313 
2314   PetscFunctionBegin;
2315   MatCheckProduct(C, 4);
2316   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2317   /* check local size of A and B */
2318   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2319              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2320 
2321   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2322   alg = flg ? 0 : 1;
2323 
2324   /* setup C */
2325   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2326   PetscCall(MatSetType(C, MATMPIDENSE));
2327   PetscCall(MatSetUp(C));
2328 
2329   /* create data structure for reuse Cdense */
2330   PetscCall(PetscNew(&ab));
2331 
2332   switch (alg) {
2333   case 1: /* alg: "elemental" */
2334 #if PetscDefined(HAVE_ELEMENTAL)
2335     /* create elemental matrices Ae and Be */
2336     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2337     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2338     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2339     PetscCall(MatSetUp(ab->Ae));
2340     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2341 
2342     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2343     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2344     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2345     PetscCall(MatSetUp(ab->Be));
2346     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2347 
2348     /* compute symbolic Ce = Ae*Be */
2349     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2350     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2351 #else
2352     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2353 #endif
2354     break;
2355   default: /* alg: "petsc" */
2356     ab->Ae = NULL;
2357     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2358     ab->Ce = NULL;
2359     break;
2360   }
2361 
2362   C->product->data       = ab;
2363   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2364   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2365   PetscFunctionReturn(PETSC_SUCCESS);
2366 }
2367 
2368 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2369 {
2370   Mat_Product *product     = C->product;
2371   const char  *algTypes[2] = {"petsc", "elemental"};
2372   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2373   PetscBool    flg = PETSC_FALSE;
2374   PetscFunctionBegin;
2375 
2376   /* Set default algorithm */
2377   alg = 0; /* default is petsc */
2378   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2379   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2380 
2381   /* Get runtime option */
2382   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2383   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2384   PetscOptionsEnd();
2385   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2386 
2387   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2388   C->ops->productsymbolic = MatProductSymbolic_AB;
2389   PetscFunctionReturn(PETSC_SUCCESS);
2390 }
2391 
2392 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2393 {
2394   Mat_Product *product = C->product;
2395   Mat          A = product->A, B = product->B;
2396 
2397   PetscFunctionBegin;
2398   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2399              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2400   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2401   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2402   PetscFunctionReturn(PETSC_SUCCESS);
2403 }
2404 
2405 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2406 {
2407   Mat_Product *product     = C->product;
2408   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2409   PetscInt     alg, nalg = 2;
2410   PetscBool    flg = PETSC_FALSE;
2411 
2412   PetscFunctionBegin;
2413   /* Set default algorithm */
2414   alg = 0; /* default is allgatherv */
2415   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2416   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2417 
2418   /* Get runtime option */
2419   if (product->api_user) {
2420     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2421     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2422     PetscOptionsEnd();
2423   } else {
2424     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2425     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2426     PetscOptionsEnd();
2427   }
2428   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2429 
2430   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2431   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2432   PetscFunctionReturn(PETSC_SUCCESS);
2433 }
2434 
2435 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2436 {
2437   Mat_Product *product = C->product;
2438 
2439   PetscFunctionBegin;
2440   switch (product->type) {
2441   case MATPRODUCT_AB:
2442     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2443     break;
2444   case MATPRODUCT_AtB:
2445     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2446     break;
2447   case MATPRODUCT_ABt:
2448     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2449     break;
2450   default:
2451     break;
2452   }
2453   PetscFunctionReturn(PETSC_SUCCESS);
2454 }
2455