xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 1b37a2a7cc4a4fb30c3e967db1c694c0a1013f51)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13   matrix that represents the operator. For sequential matrices it returns itself.
14 
15   Input Parameter:
16 . A - the sequential or MPI `MATDENSE` matrix
17 
18   Output Parameter:
19 . B - the inner matrix
20 
21   Level: intermediate
22 
23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26 {
27   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28   PetscBool     flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
32   PetscAssertPointer(B, 2);
33   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34   if (flg) *B = mat->A;
35   else {
36     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44 {
45   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
47 
48   PetscFunctionBegin;
49   PetscCall(MatCopy(Amat->A, Bmat->A, s));
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54 {
55   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57   PetscScalar  *v;
58 
59   PetscFunctionBegin;
60   PetscCall(MatDenseGetArray(mat->A, &v));
61   PetscCall(MatDenseGetLDA(mat->A, &lda));
62   rend2 = PetscMin(rend, A->cmap->N);
63   if (rend2 > rstart) {
64     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65     PetscCall(PetscLogFlops(rend2 - rstart));
66   }
67   PetscCall(MatDenseRestoreArray(mat->A, &v));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72 {
73   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
75 
76   PetscFunctionBegin;
77   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78   lrow = row - rstart;
79   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84 {
85   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
87 
88   PetscFunctionBegin;
89   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90   lrow = row - rstart;
91   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96 {
97   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
99   PetscScalar  *array;
100   MPI_Comm      comm;
101   PetscBool     flg;
102   Mat           B;
103 
104   PetscFunctionBegin;
105   PetscCall(MatHasCongruentLayouts(A, &flg));
106   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109 #if PetscDefined(HAVE_CUDA)
110     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112 #elif PetscDefined(HAVE_HIP)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115 #endif
116     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117     PetscCall(MatCreate(comm, &B));
118     PetscCall(MatSetSizes(B, m, m, m, m));
119     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124     *a = B;
125     PetscCall(MatDestroy(&B));
126   } else *a = B;
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131 {
132   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134   PetscBool     roworiented = A->roworiented;
135 
136   PetscFunctionBegin;
137   for (i = 0; i < m; i++) {
138     if (idxm[i] < 0) continue;
139     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       if (roworiented) {
143         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144       } else {
145         for (j = 0; j < n; j++) {
146           if (idxn[j] < 0) continue;
147           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149         }
150       }
151     } else if (!A->donotstash) {
152       mat->assembled = PETSC_FALSE;
153       if (roworiented) {
154         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155       } else {
156         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157       }
158     }
159   }
160   PetscFunctionReturn(PETSC_SUCCESS);
161 }
162 
163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164 {
165   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
167 
168   PetscFunctionBegin;
169   for (i = 0; i < m; i++) {
170     if (idxm[i] < 0) continue; /* negative row */
171     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j = 0; j < n; j++) {
175         if (idxn[j] < 0) continue; /* negative column */
176         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178       }
179     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180   }
181   PetscFunctionReturn(PETSC_SUCCESS);
182 }
183 
184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185 {
186   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
187 
188   PetscFunctionBegin;
189   PetscCall(MatDenseGetLDA(a->A, lda));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194 {
195   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
196   MatType       mtype = MATSEQDENSE;
197 
198   PetscFunctionBegin;
199   if (!a->A) {
200     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201     PetscCall(PetscLayoutSetUp(A->rmap));
202     PetscCall(PetscLayoutSetUp(A->cmap));
203     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205 #if PetscDefined(HAVE_CUDA)
206     PetscBool iscuda;
207     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208     if (iscuda) mtype = MATSEQDENSECUDA;
209 #elif PetscDefined(HAVE_HIP)
210     PetscBool iship;
211     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212     if (iship) mtype = MATSEQDENSEHIP;
213 #endif
214     PetscCall(MatSetType(a->A, mtype));
215   }
216   PetscCall(MatDenseSetLDA(a->A, lda));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221 {
222   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226   PetscCall(MatDenseGetArray(a->A, array));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
231 {
232   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
233 
234   PetscFunctionBegin;
235   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241 {
242   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
243 
244   PetscFunctionBegin;
245   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246   PetscCall(MatDenseGetArrayWrite(a->A, array));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251 {
252   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
253 
254   PetscFunctionBegin;
255   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257   PetscCall(MatDensePlaceArray(a->A, array));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262 {
263   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
264 
265   PetscFunctionBegin;
266   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268   PetscCall(MatDenseResetArray(a->A));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273 {
274   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
275 
276   PetscFunctionBegin;
277   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279   PetscCall(MatDenseReplaceArray(a->A, array));
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284 {
285   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
286   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287   const PetscInt    *irow, *icol;
288   const PetscScalar *v;
289   PetscScalar       *bv;
290   Mat                newmat;
291   IS                 iscol_local;
292   MPI_Comm           comm_is, comm_mat;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
298 
299   PetscCall(ISAllGather(iscol, &iscol_local));
300   PetscCall(ISGetIndices(isrow, &irow));
301   PetscCall(ISGetIndices(iscol_local, &icol));
302   PetscCall(ISGetLocalSize(isrow, &nrows));
303   PetscCall(ISGetLocalSize(iscol, &ncols));
304   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
305 
306   /* No parallel redistribution currently supported! Should really check each index set
307      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
308      original matrix! */
309 
310   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
312 
313   /* Check submatrix call */
314   if (scall == MAT_REUSE_MATRIX) {
315     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316     /* Really need to test rows and column sizes! */
317     newmat = *B;
318   } else {
319     /* Create and fill new matrix */
320     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324   }
325 
326   /* Now extract the data pointers and do the copy, column at a time */
327   newmatd = (Mat_MPIDense *)newmat->data;
328   PetscCall(MatDenseGetArray(newmatd->A, &bv));
329   PetscCall(MatDenseGetArrayRead(mat->A, &v));
330   PetscCall(MatDenseGetLDA(mat->A, &lda));
331   for (i = 0; i < Ncols; i++) {
332     const PetscScalar *av = v + lda * icol[i];
333     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334   }
335   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
337 
338   /* Assemble the matrices so that the correct flags are set */
339   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
341 
342   /* Free work space */
343   PetscCall(ISRestoreIndices(isrow, &irow));
344   PetscCall(ISRestoreIndices(iscol_local, &icol));
345   PetscCall(ISDestroy(&iscol_local));
346   *B = newmat;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351 {
352   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDenseRestoreArray(a->A, array));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
360 {
361   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
362 
363   PetscFunctionBegin;
364   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369 {
370   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
371 
372   PetscFunctionBegin;
373   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 
377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378 {
379   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380   PetscInt      nstash, reallocs;
381 
382   PetscFunctionBegin;
383   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
384 
385   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392 {
393   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394   PetscInt      i, *row, *col, flg, j, rstart, ncols;
395   PetscMPIInt   n;
396   PetscScalar  *val;
397 
398   PetscFunctionBegin;
399   if (!mdn->donotstash && !mat->nooffprocentries) {
400     /*  wait on receives */
401     while (1) {
402       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403       if (!flg) break;
404 
405       for (i = 0; i < n;) {
406         /* Now identify the consecutive vals belonging to the same row */
407         for (j = i, rstart = row[j]; j < n; j++) {
408           if (row[j] != rstart) break;
409         }
410         if (j < n) ncols = j - i;
411         else ncols = n - i;
412         /* Now assemble all these values with a single function call */
413         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414         i = j;
415       }
416     }
417     PetscCall(MatStashScatterEnd_Private(&mat->stash));
418   }
419 
420   PetscCall(MatAssemblyBegin(mdn->A, mode));
421   PetscCall(MatAssemblyEnd(mdn->A, mode));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426 {
427   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
428 
429   PetscFunctionBegin;
430   PetscCall(MatZeroEntries(l->A));
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435 {
436   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437   PetscInt      i, len, *lrows;
438 
439   PetscFunctionBegin;
440   /* get locally owned rows */
441   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442   /* fix right hand side if needed */
443   if (x && b) {
444     const PetscScalar *xx;
445     PetscScalar       *bb;
446 
447     PetscCall(VecGetArrayRead(x, &xx));
448     PetscCall(VecGetArrayWrite(b, &bb));
449     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450     PetscCall(VecRestoreArrayRead(x, &xx));
451     PetscCall(VecRestoreArrayWrite(b, &bb));
452   }
453   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454   if (diag != 0.0) {
455     Vec d;
456 
457     PetscCall(MatCreateVecs(A, NULL, &d));
458     PetscCall(VecSet(d, diag));
459     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460     PetscCall(VecDestroy(&d));
461   }
462   PetscCall(PetscFree(lrows));
463   PetscFunctionReturn(PETSC_SUCCESS);
464 }
465 
466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
467 {
468   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
469   const PetscScalar *ax;
470   PetscScalar       *ay;
471   PetscMemType       axmtype, aymtype;
472 
473   PetscFunctionBegin;
474   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
475   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
476   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
477   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
478   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
479   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
480   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
481   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
486 {
487   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
488   const PetscScalar *ax;
489   PetscScalar       *ay;
490   PetscMemType       axmtype, aymtype;
491 
492   PetscFunctionBegin;
493   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
494   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
495   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
496   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
497   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
498   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
499   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
500   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
505 {
506   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
507   const PetscScalar *ax;
508   PetscScalar       *ay;
509   PetscMemType       axmtype, aymtype;
510   PetscFunctionBegin;
511   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
512   PetscCall(VecSet(yy, 0.0));
513   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
514   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
515   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
516   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
517   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
518   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
519   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
520   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
521   PetscFunctionReturn(PETSC_SUCCESS);
522 }
523 
524 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
525 {
526   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
527   const PetscScalar *ax;
528   PetscScalar       *ay;
529   PetscMemType       axmtype, aymtype;
530 
531   PetscFunctionBegin;
532   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
533   PetscCall(VecCopy(yy, zz));
534   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
535   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
536   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
537   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
538   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
539   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
540   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
541   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
546 {
547   PetscFunctionBegin;
548   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
549   PetscFunctionReturn(PETSC_SUCCESS);
550 }
551 
552 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
553 {
554   PetscFunctionBegin;
555   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
556   PetscFunctionReturn(PETSC_SUCCESS);
557 }
558 
559 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
560 {
561   PetscFunctionBegin;
562   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
563   PetscFunctionReturn(PETSC_SUCCESS);
564 }
565 
566 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
567 {
568   PetscFunctionBegin;
569   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
570   PetscFunctionReturn(PETSC_SUCCESS);
571 }
572 
573 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
574 {
575   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
576   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
577   PetscScalar       *x;
578   const PetscScalar *av;
579 
580   PetscFunctionBegin;
581   PetscCall(VecGetArray(v, &x));
582   PetscCall(VecGetSize(v, &ng));
583   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
584   PetscCall(VecGetLocalSize(v, &nl));
585   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
586   radd = A->rmap->rstart * m;
587   PetscCall(MatDenseGetArrayRead(a->A, &av));
588   PetscCall(MatDenseGetLDA(a->A, &lda));
589   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
590   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
591   PetscCall(PetscArrayzero(x + i, nl - i));
592   PetscCall(VecRestoreArray(v, &x));
593   PetscFunctionReturn(PETSC_SUCCESS);
594 }
595 
596 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
597 {
598   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
599 
600   PetscFunctionBegin;
601   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
602   PetscCall(MatStashDestroy_Private(&mat->stash));
603   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
604   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
605   PetscCall(MatDestroy(&mdn->A));
606   PetscCall(VecDestroy(&mdn->lvec));
607   PetscCall(PetscSFDestroy(&mdn->Mvctx));
608   PetscCall(VecDestroy(&mdn->cvec));
609   PetscCall(MatDestroy(&mdn->cmat));
610 
611   PetscCall(PetscFree(mat->data));
612   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
613 
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
627 #if defined(PETSC_HAVE_ELEMENTAL)
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
629 #endif
630 #if defined(PETSC_HAVE_SCALAPACK)
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
632 #endif
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
634   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
635   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
636 #if defined(PETSC_HAVE_CUDA)
637   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
638   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
639   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
640   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
641   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
642   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
643   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
644   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
645   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
646   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
647   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
648   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
649   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
650   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
651   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
652   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
655 #endif
656 #if defined(PETSC_HAVE_HIP)
657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
673   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
674   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
675 #endif
676   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
677   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
678   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
679   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
680   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
681   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
682   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
686 
687   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
691 #include <petscdraw.h>
692 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
693 {
694   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
695   PetscMPIInt       rank;
696   PetscViewerType   vtype;
697   PetscBool         iascii, isdraw;
698   PetscViewer       sviewer;
699   PetscViewerFormat format;
700 
701   PetscFunctionBegin;
702   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
703   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
704   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
705   if (iascii) {
706     PetscCall(PetscViewerGetType(viewer, &vtype));
707     PetscCall(PetscViewerGetFormat(viewer, &format));
708     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
709       MatInfo info;
710       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
711       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
712       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
713                                                    (PetscInt)info.memory));
714       PetscCall(PetscViewerFlush(viewer));
715       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
716       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
717       PetscFunctionReturn(PETSC_SUCCESS);
718     } else if (format == PETSC_VIEWER_ASCII_INFO) {
719       PetscFunctionReturn(PETSC_SUCCESS);
720     }
721   } else if (isdraw) {
722     PetscDraw draw;
723     PetscBool isnull;
724 
725     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
726     PetscCall(PetscDrawIsNull(draw, &isnull));
727     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
728   }
729 
730   {
731     /* assemble the entire matrix onto first processor. */
732     Mat          A;
733     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
734     PetscInt    *cols;
735     PetscScalar *vals;
736 
737     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
738     if (rank == 0) {
739       PetscCall(MatSetSizes(A, M, N, M, N));
740     } else {
741       PetscCall(MatSetSizes(A, 0, 0, M, N));
742     }
743     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
744     PetscCall(MatSetType(A, MATMPIDENSE));
745     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
746 
747     /* Copy the matrix ... This isn't the most efficient means,
748        but it's quick for now */
749     A->insertmode = INSERT_VALUES;
750 
751     row = mat->rmap->rstart;
752     m   = mdn->A->rmap->n;
753     for (i = 0; i < m; i++) {
754       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
755       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
756       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
757       row++;
758     }
759 
760     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
761     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
762     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
763     if (rank == 0) {
764       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
765       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
766     }
767     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
768     PetscCall(MatDestroy(&A));
769   }
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
774 {
775   PetscBool iascii, isbinary, isdraw, issocket;
776 
777   PetscFunctionBegin;
778   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
779   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
780   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
781   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
782 
783   if (iascii || issocket || isdraw) {
784     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
785   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
790 {
791   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
792   Mat            mdn = mat->A;
793   PetscLogDouble isend[5], irecv[5];
794 
795   PetscFunctionBegin;
796   info->block_size = 1.0;
797 
798   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
799 
800   isend[0] = info->nz_used;
801   isend[1] = info->nz_allocated;
802   isend[2] = info->nz_unneeded;
803   isend[3] = info->memory;
804   isend[4] = info->mallocs;
805   if (flag == MAT_LOCAL) {
806     info->nz_used      = isend[0];
807     info->nz_allocated = isend[1];
808     info->nz_unneeded  = isend[2];
809     info->memory       = isend[3];
810     info->mallocs      = isend[4];
811   } else if (flag == MAT_GLOBAL_MAX) {
812     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
813 
814     info->nz_used      = irecv[0];
815     info->nz_allocated = irecv[1];
816     info->nz_unneeded  = irecv[2];
817     info->memory       = irecv[3];
818     info->mallocs      = irecv[4];
819   } else if (flag == MAT_GLOBAL_SUM) {
820     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
821 
822     info->nz_used      = irecv[0];
823     info->nz_allocated = irecv[1];
824     info->nz_unneeded  = irecv[2];
825     info->memory       = irecv[3];
826     info->mallocs      = irecv[4];
827   }
828   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
829   info->fill_ratio_needed = 0;
830   info->factor_mallocs    = 0;
831   PetscFunctionReturn(PETSC_SUCCESS);
832 }
833 
834 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
835 {
836   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
837 
838   PetscFunctionBegin;
839   switch (op) {
840   case MAT_NEW_NONZERO_LOCATIONS:
841   case MAT_NEW_NONZERO_LOCATION_ERR:
842   case MAT_NEW_NONZERO_ALLOCATION_ERR:
843     MatCheckPreallocated(A, 1);
844     PetscCall(MatSetOption(a->A, op, flg));
845     break;
846   case MAT_ROW_ORIENTED:
847     MatCheckPreallocated(A, 1);
848     a->roworiented = flg;
849     PetscCall(MatSetOption(a->A, op, flg));
850     break;
851   case MAT_FORCE_DIAGONAL_ENTRIES:
852   case MAT_KEEP_NONZERO_PATTERN:
853   case MAT_USE_HASH_TABLE:
854   case MAT_SORTED_FULL:
855     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
856     break;
857   case MAT_IGNORE_OFF_PROC_ENTRIES:
858     a->donotstash = flg;
859     break;
860   case MAT_SYMMETRIC:
861   case MAT_STRUCTURALLY_SYMMETRIC:
862   case MAT_HERMITIAN:
863   case MAT_SYMMETRY_ETERNAL:
864   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
865   case MAT_SPD:
866   case MAT_IGNORE_LOWER_TRIANGULAR:
867   case MAT_IGNORE_ZERO_ENTRIES:
868   case MAT_SPD_ETERNAL:
869     /* if the diagonal matrix is square it inherits some of the properties above */
870     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
871     break;
872   default:
873     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
874   }
875   PetscFunctionReturn(PETSC_SUCCESS);
876 }
877 
878 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
879 {
880   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
881   const PetscScalar *l;
882   PetscScalar        x, *v, *vv, *r;
883   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
884 
885   PetscFunctionBegin;
886   PetscCall(MatDenseGetArray(mdn->A, &vv));
887   PetscCall(MatDenseGetLDA(mdn->A, &lda));
888   PetscCall(MatGetLocalSize(A, &s2, &s3));
889   if (ll) {
890     PetscCall(VecGetLocalSize(ll, &s2a));
891     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
892     PetscCall(VecGetArrayRead(ll, &l));
893     for (i = 0; i < m; i++) {
894       x = l[i];
895       v = vv + i;
896       for (j = 0; j < n; j++) {
897         (*v) *= x;
898         v += lda;
899       }
900     }
901     PetscCall(VecRestoreArrayRead(ll, &l));
902     PetscCall(PetscLogFlops(1.0 * n * m));
903   }
904   if (rr) {
905     const PetscScalar *ar;
906 
907     PetscCall(VecGetLocalSize(rr, &s3a));
908     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
909     PetscCall(VecGetArrayRead(rr, &ar));
910     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
911     PetscCall(VecGetArray(mdn->lvec, &r));
912     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
913     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
914     PetscCall(VecRestoreArrayRead(rr, &ar));
915     for (i = 0; i < n; i++) {
916       x = r[i];
917       v = vv + i * lda;
918       for (j = 0; j < m; j++) (*v++) *= x;
919     }
920     PetscCall(VecRestoreArray(mdn->lvec, &r));
921     PetscCall(PetscLogFlops(1.0 * n * m));
922   }
923   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
924   PetscFunctionReturn(PETSC_SUCCESS);
925 }
926 
927 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
928 {
929   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
930   PetscInt           i, j;
931   PetscMPIInt        size;
932   PetscReal          sum = 0.0;
933   const PetscScalar *av, *v;
934 
935   PetscFunctionBegin;
936   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
937   v = av;
938   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
939   if (size == 1) {
940     PetscCall(MatNorm(mdn->A, type, nrm));
941   } else {
942     if (type == NORM_FROBENIUS) {
943       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
944         sum += PetscRealPart(PetscConj(*v) * (*v));
945         v++;
946       }
947       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
948       *nrm = PetscSqrtReal(*nrm);
949       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
950     } else if (type == NORM_1) {
951       PetscReal *tmp, *tmp2;
952       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
953       *nrm = 0.0;
954       v    = av;
955       for (j = 0; j < mdn->A->cmap->n; j++) {
956         for (i = 0; i < mdn->A->rmap->n; i++) {
957           tmp[j] += PetscAbsScalar(*v);
958           v++;
959         }
960       }
961       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
962       for (j = 0; j < A->cmap->N; j++) {
963         if (tmp2[j] > *nrm) *nrm = tmp2[j];
964       }
965       PetscCall(PetscFree2(tmp, tmp2));
966       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
967     } else if (type == NORM_INFINITY) { /* max row norm */
968       PetscReal ntemp;
969       PetscCall(MatNorm(mdn->A, type, &ntemp));
970       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
971     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
972   }
973   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
974   PetscFunctionReturn(PETSC_SUCCESS);
975 }
976 
977 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
978 {
979   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
980   Mat           B;
981   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
982   PetscInt      j, i, lda;
983   PetscScalar  *v;
984 
985   PetscFunctionBegin;
986   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
988     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
989     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
990     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
991     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
992   } else B = *matout;
993 
994   m = a->A->rmap->n;
995   n = a->A->cmap->n;
996   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
997   PetscCall(MatDenseGetLDA(a->A, &lda));
998   PetscCall(PetscMalloc1(m, &rwork));
999   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1000   for (j = 0; j < n; j++) {
1001     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1002     v = PetscSafePointerPlusOffset(v, lda);
1003   }
1004   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1005   PetscCall(PetscFree(rwork));
1006   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1007   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1008   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1009     *matout = B;
1010   } else {
1011     PetscCall(MatHeaderMerge(A, &B));
1012   }
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1017 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1018 
1019 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1020 {
1021   PetscFunctionBegin;
1022   PetscCall(PetscLayoutSetUp(A->rmap));
1023   PetscCall(PetscLayoutSetUp(A->cmap));
1024   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1025   PetscFunctionReturn(PETSC_SUCCESS);
1026 }
1027 
1028 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1029 {
1030   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1031 
1032   PetscFunctionBegin;
1033   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1038 {
1039   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1040 
1041   PetscFunctionBegin;
1042   PetscCall(MatConjugate(a->A));
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1047 {
1048   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1049 
1050   PetscFunctionBegin;
1051   PetscCall(MatRealPart(a->A));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1056 {
1057   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1058 
1059   PetscFunctionBegin;
1060   PetscCall(MatImaginaryPart(a->A));
1061   PetscFunctionReturn(PETSC_SUCCESS);
1062 }
1063 
1064 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1065 {
1066   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1067 
1068   PetscFunctionBegin;
1069   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1070   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1071   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1072   PetscFunctionReturn(PETSC_SUCCESS);
1073 }
1074 
1075 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1076 
1077 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1078 {
1079   PetscInt      i, m, n;
1080   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1081   PetscReal    *work;
1082 
1083   PetscFunctionBegin;
1084   PetscCall(MatGetSize(A, &m, &n));
1085   PetscCall(PetscMalloc1(n, &work));
1086   if (type == REDUCTION_MEAN_REALPART) {
1087     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1088   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1089     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1090   } else {
1091     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1092   }
1093   if (type == NORM_2) {
1094     for (i = 0; i < n; i++) work[i] *= work[i];
1095   }
1096   if (type == NORM_INFINITY) {
1097     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1098   } else {
1099     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1100   }
1101   PetscCall(PetscFree(work));
1102   if (type == NORM_2) {
1103     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1104   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1105     for (i = 0; i < n; i++) reductions[i] /= m;
1106   }
1107   PetscFunctionReturn(PETSC_SUCCESS);
1108 }
1109 
1110 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1111 {
1112   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1113 
1114   PetscFunctionBegin;
1115   PetscCall(MatSetRandom(d->A, rctx));
1116 #if defined(PETSC_HAVE_DEVICE)
1117   x->offloadmask = d->A->offloadmask;
1118 #endif
1119   PetscFunctionReturn(PETSC_SUCCESS);
1120 }
1121 
1122 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1123 {
1124   PetscFunctionBegin;
1125   *missing = PETSC_FALSE;
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1130 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1131 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1132 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1133 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1134 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1135 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1136 
1137 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1138                                        MatGetRow_MPIDense,
1139                                        MatRestoreRow_MPIDense,
1140                                        MatMult_MPIDense,
1141                                        /*  4*/ MatMultAdd_MPIDense,
1142                                        MatMultTranspose_MPIDense,
1143                                        MatMultTransposeAdd_MPIDense,
1144                                        NULL,
1145                                        NULL,
1146                                        NULL,
1147                                        /* 10*/ NULL,
1148                                        NULL,
1149                                        NULL,
1150                                        NULL,
1151                                        MatTranspose_MPIDense,
1152                                        /* 15*/ MatGetInfo_MPIDense,
1153                                        MatEqual_MPIDense,
1154                                        MatGetDiagonal_MPIDense,
1155                                        MatDiagonalScale_MPIDense,
1156                                        MatNorm_MPIDense,
1157                                        /* 20*/ MatAssemblyBegin_MPIDense,
1158                                        MatAssemblyEnd_MPIDense,
1159                                        MatSetOption_MPIDense,
1160                                        MatZeroEntries_MPIDense,
1161                                        /* 24*/ MatZeroRows_MPIDense,
1162                                        NULL,
1163                                        NULL,
1164                                        NULL,
1165                                        NULL,
1166                                        /* 29*/ MatSetUp_MPIDense,
1167                                        NULL,
1168                                        NULL,
1169                                        MatGetDiagonalBlock_MPIDense,
1170                                        NULL,
1171                                        /* 34*/ MatDuplicate_MPIDense,
1172                                        NULL,
1173                                        NULL,
1174                                        NULL,
1175                                        NULL,
1176                                        /* 39*/ MatAXPY_MPIDense,
1177                                        MatCreateSubMatrices_MPIDense,
1178                                        NULL,
1179                                        MatGetValues_MPIDense,
1180                                        MatCopy_MPIDense,
1181                                        /* 44*/ NULL,
1182                                        MatScale_MPIDense,
1183                                        MatShift_MPIDense,
1184                                        NULL,
1185                                        NULL,
1186                                        /* 49*/ MatSetRandom_MPIDense,
1187                                        NULL,
1188                                        NULL,
1189                                        NULL,
1190                                        NULL,
1191                                        /* 54*/ NULL,
1192                                        NULL,
1193                                        NULL,
1194                                        NULL,
1195                                        NULL,
1196                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1197                                        MatDestroy_MPIDense,
1198                                        MatView_MPIDense,
1199                                        NULL,
1200                                        NULL,
1201                                        /* 64*/ NULL,
1202                                        NULL,
1203                                        NULL,
1204                                        NULL,
1205                                        NULL,
1206                                        /* 69*/ NULL,
1207                                        NULL,
1208                                        NULL,
1209                                        NULL,
1210                                        NULL,
1211                                        /* 74*/ NULL,
1212                                        NULL,
1213                                        NULL,
1214                                        NULL,
1215                                        NULL,
1216                                        /* 79*/ NULL,
1217                                        NULL,
1218                                        NULL,
1219                                        NULL,
1220                                        /* 83*/ MatLoad_MPIDense,
1221                                        NULL,
1222                                        NULL,
1223                                        NULL,
1224                                        NULL,
1225                                        NULL,
1226                                        /* 89*/ NULL,
1227                                        NULL,
1228                                        NULL,
1229                                        NULL,
1230                                        NULL,
1231                                        /* 94*/ NULL,
1232                                        NULL,
1233                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1234                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1235                                        NULL,
1236                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1237                                        NULL,
1238                                        NULL,
1239                                        MatConjugate_MPIDense,
1240                                        NULL,
1241                                        /*104*/ NULL,
1242                                        MatRealPart_MPIDense,
1243                                        MatImaginaryPart_MPIDense,
1244                                        NULL,
1245                                        NULL,
1246                                        /*109*/ NULL,
1247                                        NULL,
1248                                        NULL,
1249                                        MatGetColumnVector_MPIDense,
1250                                        MatMissingDiagonal_MPIDense,
1251                                        /*114*/ NULL,
1252                                        NULL,
1253                                        NULL,
1254                                        NULL,
1255                                        NULL,
1256                                        /*119*/ NULL,
1257                                        NULL,
1258                                        MatMultHermitianTranspose_MPIDense,
1259                                        MatMultHermitianTransposeAdd_MPIDense,
1260                                        NULL,
1261                                        /*124*/ NULL,
1262                                        MatGetColumnReductions_MPIDense,
1263                                        NULL,
1264                                        NULL,
1265                                        NULL,
1266                                        /*129*/ NULL,
1267                                        NULL,
1268                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1269                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1270                                        NULL,
1271                                        /*134*/ NULL,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        /*139*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        NULL,
1281                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1282                                        /*145*/ NULL,
1283                                        NULL,
1284                                        NULL,
1285                                        NULL,
1286                                        NULL,
1287                                        /*150*/ NULL,
1288                                        NULL,
1289                                        NULL};
1290 
1291 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1292 {
1293   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1294   MatType       mtype = MATSEQDENSE;
1295 
1296   PetscFunctionBegin;
1297   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1298   PetscCall(PetscLayoutSetUp(mat->rmap));
1299   PetscCall(PetscLayoutSetUp(mat->cmap));
1300   if (!a->A) {
1301     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1302     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1303   }
1304 #if defined(PETSC_HAVE_CUDA)
1305   PetscBool iscuda;
1306   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1307   if (iscuda) mtype = MATSEQDENSECUDA;
1308 #endif
1309 #if defined(PETSC_HAVE_HIP)
1310   PetscBool iship;
1311   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1312   if (iship) mtype = MATSEQDENSEHIP;
1313 #endif
1314   PetscCall(MatSetType(a->A, mtype));
1315   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1316 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1317   mat->offloadmask = a->A->offloadmask;
1318 #endif
1319   mat->preallocated = PETSC_TRUE;
1320   mat->assembled    = PETSC_TRUE;
1321   PetscFunctionReturn(PETSC_SUCCESS);
1322 }
1323 
1324 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1325 {
1326   Mat B, C;
1327 
1328   PetscFunctionBegin;
1329   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1330   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1331   PetscCall(MatDestroy(&C));
1332   if (reuse == MAT_REUSE_MATRIX) {
1333     C = *newmat;
1334   } else C = NULL;
1335   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1336   PetscCall(MatDestroy(&B));
1337   if (reuse == MAT_INPLACE_MATRIX) {
1338     PetscCall(MatHeaderReplace(A, &C));
1339   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1340   PetscFunctionReturn(PETSC_SUCCESS);
1341 }
1342 
1343 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1344 {
1345   Mat B, C;
1346 
1347   PetscFunctionBegin;
1348   PetscCall(MatDenseGetLocalMatrix(A, &C));
1349   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1350   if (reuse == MAT_REUSE_MATRIX) {
1351     C = *newmat;
1352   } else C = NULL;
1353   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1354   PetscCall(MatDestroy(&B));
1355   if (reuse == MAT_INPLACE_MATRIX) {
1356     PetscCall(MatHeaderReplace(A, &C));
1357   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1358   PetscFunctionReturn(PETSC_SUCCESS);
1359 }
1360 
1361 #if defined(PETSC_HAVE_ELEMENTAL)
1362 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1363 {
1364   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1365   Mat           mat_elemental;
1366   PetscScalar  *v;
1367   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1368 
1369   PetscFunctionBegin;
1370   if (reuse == MAT_REUSE_MATRIX) {
1371     mat_elemental = *newmat;
1372     PetscCall(MatZeroEntries(*newmat));
1373   } else {
1374     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1375     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1376     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1377     PetscCall(MatSetUp(mat_elemental));
1378     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1379   }
1380 
1381   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1382   for (i = 0; i < N; i++) cols[i] = i;
1383   for (i = 0; i < m; i++) rows[i] = rstart + i;
1384 
1385   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1386   PetscCall(MatDenseGetArray(A, &v));
1387   PetscCall(MatDenseGetLDA(a->A, &lda));
1388   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1389   else {
1390     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1391   }
1392   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1393   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1394   PetscCall(MatDenseRestoreArray(A, &v));
1395   PetscCall(PetscFree2(rows, cols));
1396 
1397   if (reuse == MAT_INPLACE_MATRIX) {
1398     PetscCall(MatHeaderReplace(A, &mat_elemental));
1399   } else {
1400     *newmat = mat_elemental;
1401   }
1402   PetscFunctionReturn(PETSC_SUCCESS);
1403 }
1404 #endif
1405 
1406 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1407 {
1408   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1409 
1410   PetscFunctionBegin;
1411   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1412   PetscFunctionReturn(PETSC_SUCCESS);
1413 }
1414 
1415 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1416 {
1417   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1418 
1419   PetscFunctionBegin;
1420   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1421   PetscFunctionReturn(PETSC_SUCCESS);
1422 }
1423 
1424 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1425 {
1426   Mat_MPIDense *mat;
1427   PetscInt      m, nloc, N;
1428 
1429   PetscFunctionBegin;
1430   PetscCall(MatGetSize(inmat, &m, &N));
1431   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1432   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1433     PetscInt sum;
1434 
1435     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1436     /* Check sum(n) = N */
1437     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1438     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1439 
1440     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1441     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1442   }
1443 
1444   /* numeric phase */
1445   mat = (Mat_MPIDense *)(*outmat)->data;
1446   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1447   PetscFunctionReturn(PETSC_SUCCESS);
1448 }
1449 
1450 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1451 {
1452   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1453   PetscInt      lda;
1454 
1455   PetscFunctionBegin;
1456   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1457   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1458   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1459   a->vecinuse = col + 1;
1460   PetscCall(MatDenseGetLDA(a->A, &lda));
1461   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1462   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1463   *v = a->cvec;
1464   PetscFunctionReturn(PETSC_SUCCESS);
1465 }
1466 
1467 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1468 {
1469   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1470 
1471   PetscFunctionBegin;
1472   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1473   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1474   a->vecinuse = 0;
1475   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1476   PetscCall(VecResetArray(a->cvec));
1477   if (v) *v = NULL;
1478   PetscFunctionReturn(PETSC_SUCCESS);
1479 }
1480 
1481 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1482 {
1483   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1484   PetscInt      lda;
1485 
1486   PetscFunctionBegin;
1487   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1488   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1489   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1490   a->vecinuse = col + 1;
1491   PetscCall(MatDenseGetLDA(a->A, &lda));
1492   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1493   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1494   PetscCall(VecLockReadPush(a->cvec));
1495   *v = a->cvec;
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1500 {
1501   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1502 
1503   PetscFunctionBegin;
1504   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1505   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1506   a->vecinuse = 0;
1507   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1508   PetscCall(VecLockReadPop(a->cvec));
1509   PetscCall(VecResetArray(a->cvec));
1510   if (v) *v = NULL;
1511   PetscFunctionReturn(PETSC_SUCCESS);
1512 }
1513 
1514 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1515 {
1516   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1517   PetscInt      lda;
1518 
1519   PetscFunctionBegin;
1520   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1521   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1522   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1523   a->vecinuse = col + 1;
1524   PetscCall(MatDenseGetLDA(a->A, &lda));
1525   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1526   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1527   *v = a->cvec;
1528   PetscFunctionReturn(PETSC_SUCCESS);
1529 }
1530 
1531 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1532 {
1533   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1534 
1535   PetscFunctionBegin;
1536   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1537   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1538   a->vecinuse = 0;
1539   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1540   PetscCall(VecResetArray(a->cvec));
1541   if (v) *v = NULL;
1542   PetscFunctionReturn(PETSC_SUCCESS);
1543 }
1544 
1545 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1546 {
1547   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1548   Mat_MPIDense *c;
1549   MPI_Comm      comm;
1550   PetscInt      pbegin, pend;
1551 
1552   PetscFunctionBegin;
1553   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1554   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1555   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1556   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1557   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1558   if (!a->cmat) {
1559     PetscCall(MatCreate(comm, &a->cmat));
1560     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1561     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1562     else {
1563       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1564       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1565       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1566     }
1567     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1568     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1569   } else {
1570     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1571     if (same && a->cmat->rmap->N != A->rmap->N) {
1572       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1573       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1574     }
1575     if (!same) {
1576       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1577       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1578       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1579       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1580       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1581     }
1582     if (cend - cbegin != a->cmat->cmap->N) {
1583       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1584       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1585       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1586       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1587     }
1588   }
1589   c = (Mat_MPIDense *)a->cmat->data;
1590   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1591   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1592 
1593   a->cmat->preallocated = PETSC_TRUE;
1594   a->cmat->assembled    = PETSC_TRUE;
1595 #if defined(PETSC_HAVE_DEVICE)
1596   a->cmat->offloadmask = c->A->offloadmask;
1597 #endif
1598   a->matinuse = cbegin + 1;
1599   *v          = a->cmat;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1604 {
1605   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1606   Mat_MPIDense *c;
1607 
1608   PetscFunctionBegin;
1609   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1610   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1611   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1612   a->matinuse = 0;
1613   c           = (Mat_MPIDense *)a->cmat->data;
1614   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1615   if (v) *v = NULL;
1616 #if defined(PETSC_HAVE_DEVICE)
1617   A->offloadmask = a->A->offloadmask;
1618 #endif
1619   PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621 
1622 /*MC
1623    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1624 
1625    Options Database Key:
1626 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1627 
1628   Level: beginner
1629 
1630 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1631 M*/
1632 PetscErrorCode MatCreate_MPIDense(Mat mat)
1633 {
1634   Mat_MPIDense *a;
1635 
1636   PetscFunctionBegin;
1637   PetscCall(PetscNew(&a));
1638   mat->data   = (void *)a;
1639   mat->ops[0] = MatOps_Values;
1640 
1641   mat->insertmode = NOT_SET_VALUES;
1642 
1643   /* build cache for off array entries formed */
1644   a->donotstash = PETSC_FALSE;
1645 
1646   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1647 
1648   /* stuff used for matrix vector multiply */
1649   a->lvec        = NULL;
1650   a->Mvctx       = NULL;
1651   a->roworiented = PETSC_TRUE;
1652 
1653   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1654   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1655   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1674 #if defined(PETSC_HAVE_ELEMENTAL)
1675   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1676 #endif
1677 #if defined(PETSC_HAVE_SCALAPACK)
1678   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1679 #endif
1680 #if defined(PETSC_HAVE_CUDA)
1681   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1682 #endif
1683   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1684   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1685   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1686 #if defined(PETSC_HAVE_CUDA)
1687   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1688   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1689 #endif
1690 #if defined(PETSC_HAVE_HIP)
1691   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1692   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1693   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1694 #endif
1695   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1696   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1697   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1698   PetscFunctionReturn(PETSC_SUCCESS);
1699 }
1700 
1701 /*MC
1702    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1703 
1704    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1705    and `MATMPIDENSE` otherwise.
1706 
1707    Options Database Key:
1708 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1709 
1710   Level: beginner
1711 
1712 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1713 M*/
1714 
1715 /*@C
1716   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1717 
1718   Collective
1719 
1720   Input Parameters:
1721 + B    - the matrix
1722 - data - optional location of matrix data.  Set to `NULL` for PETSc
1723    to control all matrix memory allocation.
1724 
1725   Level: intermediate
1726 
1727   Notes:
1728   The dense format is fully compatible with standard Fortran
1729   storage by columns.
1730 
1731   The data input variable is intended primarily for Fortran programmers
1732   who wish to allocate their own matrix memory space.  Most users should
1733   set `data` to `NULL`.
1734 
1735 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1736 @*/
1737 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1738 {
1739   PetscFunctionBegin;
1740   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1741   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1742   PetscFunctionReturn(PETSC_SUCCESS);
1743 }
1744 
1745 /*@
1746   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1747   array provided by the user. This is useful to avoid copying an array
1748   into a matrix
1749 
1750   Not Collective
1751 
1752   Input Parameters:
1753 + mat   - the matrix
1754 - array - the array in column major order
1755 
1756   Level: developer
1757 
1758   Note:
1759   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1760   freed when the matrix is destroyed.
1761 
1762 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1763           `MatDenseReplaceArray()`
1764 @*/
1765 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1766 {
1767   PetscFunctionBegin;
1768   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1769   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1770   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1771 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1772   mat->offloadmask = PETSC_OFFLOAD_CPU;
1773 #endif
1774   PetscFunctionReturn(PETSC_SUCCESS);
1775 }
1776 
1777 /*@
1778   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1779 
1780   Not Collective
1781 
1782   Input Parameter:
1783 . mat - the matrix
1784 
1785   Level: developer
1786 
1787   Note:
1788   You can only call this after a call to `MatDensePlaceArray()`
1789 
1790 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1791 @*/
1792 PetscErrorCode MatDenseResetArray(Mat mat)
1793 {
1794   PetscFunctionBegin;
1795   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1796   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1797   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1798   PetscFunctionReturn(PETSC_SUCCESS);
1799 }
1800 
1801 /*@
1802   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1803   array provided by the user. This is useful to avoid copying an array
1804   into a matrix
1805 
1806   Not Collective
1807 
1808   Input Parameters:
1809 + mat   - the matrix
1810 - array - the array in column major order
1811 
1812   Level: developer
1813 
1814   Note:
1815   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1816   freed by the user. It will be freed when the matrix is destroyed.
1817 
1818 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1819 @*/
1820 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1821 {
1822   PetscFunctionBegin;
1823   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1824   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1825   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1826 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1827   mat->offloadmask = PETSC_OFFLOAD_CPU;
1828 #endif
1829   PetscFunctionReturn(PETSC_SUCCESS);
1830 }
1831 
1832 /*@C
1833   MatCreateDense - Creates a matrix in `MATDENSE` format.
1834 
1835   Collective
1836 
1837   Input Parameters:
1838 + comm - MPI communicator
1839 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1840 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1841 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1842 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1843 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1844    to control all matrix memory allocation.
1845 
1846   Output Parameter:
1847 . A - the matrix
1848 
1849   Level: intermediate
1850 
1851   Notes:
1852   The dense format is fully compatible with standard Fortran
1853   storage by columns.
1854 
1855   Although local portions of the matrix are stored in column-major
1856   order, the matrix is partitioned across MPI ranks by row.
1857 
1858   The data input variable is intended primarily for Fortran programmers
1859   who wish to allocate their own matrix memory space.  Most users should
1860   set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1861 
1862   The user MUST specify either the local or global matrix dimensions
1863   (possibly both).
1864 
1865 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1866 @*/
1867 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1868 {
1869   PetscFunctionBegin;
1870   PetscCall(MatCreate(comm, A));
1871   PetscCall(MatSetSizes(*A, m, n, M, N));
1872   PetscCall(MatSetType(*A, MATDENSE));
1873   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1874   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1875   PetscFunctionReturn(PETSC_SUCCESS);
1876 }
1877 
1878 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1879 {
1880   Mat           mat;
1881   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1882 
1883   PetscFunctionBegin;
1884   *newmat = NULL;
1885   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1886   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1887   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1888   a = (Mat_MPIDense *)mat->data;
1889 
1890   mat->factortype   = A->factortype;
1891   mat->assembled    = PETSC_TRUE;
1892   mat->preallocated = PETSC_TRUE;
1893 
1894   mat->insertmode = NOT_SET_VALUES;
1895   a->donotstash   = oldmat->donotstash;
1896 
1897   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1898   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1899 
1900   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1901 
1902   *newmat = mat;
1903   PetscFunctionReturn(PETSC_SUCCESS);
1904 }
1905 
1906 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1907 {
1908   PetscBool isbinary;
1909 #if defined(PETSC_HAVE_HDF5)
1910   PetscBool ishdf5;
1911 #endif
1912 
1913   PetscFunctionBegin;
1914   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
1915   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1916   /* force binary viewer to load .info file if it has not yet done so */
1917   PetscCall(PetscViewerSetUp(viewer));
1918   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1919 #if defined(PETSC_HAVE_HDF5)
1920   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1921 #endif
1922   if (isbinary) {
1923     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1924 #if defined(PETSC_HAVE_HDF5)
1925   } else if (ishdf5) {
1926     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1927 #endif
1928   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1929   PetscFunctionReturn(PETSC_SUCCESS);
1930 }
1931 
1932 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1933 {
1934   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1935   Mat           a, b;
1936 
1937   PetscFunctionBegin;
1938   a = matA->A;
1939   b = matB->A;
1940   PetscCall(MatEqual(a, b, flag));
1941   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1942   PetscFunctionReturn(PETSC_SUCCESS);
1943 }
1944 
1945 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1946 {
1947   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1948 
1949   PetscFunctionBegin;
1950   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1951   PetscCall(MatDestroy(&atb->atb));
1952   PetscCall(PetscFree(atb));
1953   PetscFunctionReturn(PETSC_SUCCESS);
1954 }
1955 
1956 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1957 {
1958   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1959 
1960   PetscFunctionBegin;
1961   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1962   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1963   PetscCall(PetscFree(abt));
1964   PetscFunctionReturn(PETSC_SUCCESS);
1965 }
1966 
1967 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1968 {
1969   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1970   Mat_TransMatMultDense *atb;
1971   MPI_Comm               comm;
1972   PetscMPIInt            size, *recvcounts;
1973   PetscScalar           *carray, *sendbuf;
1974   const PetscScalar     *atbarray;
1975   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
1976   const PetscInt        *ranges;
1977 
1978   PetscFunctionBegin;
1979   MatCheckProduct(C, 3);
1980   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1981   atb        = (Mat_TransMatMultDense *)C->product->data;
1982   recvcounts = atb->recvcounts;
1983   sendbuf    = atb->sendbuf;
1984 
1985   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1986   PetscCallMPI(MPI_Comm_size(comm, &size));
1987 
1988   /* compute atbarray = aseq^T * bseq */
1989   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1990 
1991   PetscCall(MatGetOwnershipRanges(C, &ranges));
1992 
1993   /* arrange atbarray into sendbuf */
1994   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1995   PetscCall(MatDenseGetLDA(atb->atb, &lda));
1996   for (proc = 0, k = 0; proc < size; proc++) {
1997     for (j = 0; j < cN; j++) {
1998       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1999     }
2000   }
2001   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2002 
2003   /* sum all atbarray to local values of C */
2004   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2005   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2006   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2007   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2008   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2009   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2010   PetscFunctionReturn(PETSC_SUCCESS);
2011 }
2012 
2013 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2014 {
2015   MPI_Comm               comm;
2016   PetscMPIInt            size;
2017   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2018   Mat_TransMatMultDense *atb;
2019   PetscBool              cisdense = PETSC_FALSE;
2020   PetscInt               i;
2021   const PetscInt        *ranges;
2022 
2023   PetscFunctionBegin;
2024   MatCheckProduct(C, 4);
2025   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2026   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2027   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2028     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2029   }
2030 
2031   /* create matrix product C */
2032   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2033 #if defined(PETSC_HAVE_CUDA)
2034   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2035 #endif
2036 #if defined(PETSC_HAVE_HIP)
2037   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2038 #endif
2039   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2040   PetscCall(MatSetUp(C));
2041 
2042   /* create data structure for reuse C */
2043   PetscCallMPI(MPI_Comm_size(comm, &size));
2044   PetscCall(PetscNew(&atb));
2045   cM = C->rmap->N;
2046   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2047   PetscCall(MatGetOwnershipRanges(C, &ranges));
2048   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2049 
2050   C->product->data    = atb;
2051   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2056 {
2057   MPI_Comm               comm;
2058   PetscMPIInt            i, size;
2059   PetscInt               maxRows, bufsiz;
2060   PetscMPIInt            tag;
2061   PetscInt               alg;
2062   Mat_MatTransMultDense *abt;
2063   Mat_Product           *product = C->product;
2064   PetscBool              flg;
2065 
2066   PetscFunctionBegin;
2067   MatCheckProduct(C, 4);
2068   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2069   /* check local size of A and B */
2070   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2071 
2072   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2073   alg = flg ? 0 : 1;
2074 
2075   /* setup matrix product C */
2076   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2077   PetscCall(MatSetType(C, MATMPIDENSE));
2078   PetscCall(MatSetUp(C));
2079   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2080 
2081   /* create data structure for reuse C */
2082   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2083   PetscCallMPI(MPI_Comm_size(comm, &size));
2084   PetscCall(PetscNew(&abt));
2085   abt->tag = tag;
2086   abt->alg = alg;
2087   switch (alg) {
2088   case 1: /* alg: "cyclic" */
2089     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2090     bufsiz = A->cmap->N * maxRows;
2091     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2092     break;
2093   default: /* alg: "allgatherv" */
2094     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2095     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2096     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2097     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2098     break;
2099   }
2100 
2101   C->product->data                = abt;
2102   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2103   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2104   PetscFunctionReturn(PETSC_SUCCESS);
2105 }
2106 
2107 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2108 {
2109   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2110   Mat_MatTransMultDense *abt;
2111   MPI_Comm               comm;
2112   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2113   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2114   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2115   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2116   const PetscScalar     *av, *bv;
2117   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2118   MPI_Request            reqs[2];
2119   const PetscInt        *ranges;
2120 
2121   PetscFunctionBegin;
2122   MatCheckProduct(C, 3);
2123   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2124   abt = (Mat_MatTransMultDense *)C->product->data;
2125   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2126   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2127   PetscCallMPI(MPI_Comm_size(comm, &size));
2128   PetscCall(MatDenseGetArrayRead(a->A, &av));
2129   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2130   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2131   PetscCall(MatDenseGetLDA(a->A, &i));
2132   PetscCall(PetscBLASIntCast(i, &alda));
2133   PetscCall(MatDenseGetLDA(b->A, &i));
2134   PetscCall(PetscBLASIntCast(i, &blda));
2135   PetscCall(MatDenseGetLDA(c->A, &i));
2136   PetscCall(PetscBLASIntCast(i, &clda));
2137   PetscCall(MatGetOwnershipRanges(B, &ranges));
2138   bn = B->rmap->n;
2139   if (blda == bn) {
2140     sendbuf = (PetscScalar *)bv;
2141   } else {
2142     sendbuf = abt->buf[0];
2143     for (k = 0, i = 0; i < cK; i++) {
2144       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2145     }
2146   }
2147   if (size > 1) {
2148     sendto   = (rank + size - 1) % size;
2149     recvfrom = (rank + size + 1) % size;
2150   } else {
2151     sendto = recvfrom = 0;
2152   }
2153   PetscCall(PetscBLASIntCast(cK, &ck));
2154   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2155   recvisfrom = rank;
2156   for (i = 0; i < size; i++) {
2157     /* we have finished receiving in sending, bufs can be read/modified */
2158     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2159     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2160 
2161     if (nextrecvisfrom != rank) {
2162       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2163       sendsiz = cK * bn;
2164       recvsiz = cK * nextbn;
2165       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2166       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2167       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2168     }
2169 
2170     /* local aseq * sendbuf^T */
2171     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2172     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2173 
2174     if (nextrecvisfrom != rank) {
2175       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2176       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2177     }
2178     bn         = nextbn;
2179     recvisfrom = nextrecvisfrom;
2180     sendbuf    = recvbuf;
2181   }
2182   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2183   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2184   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2185   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2186   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2187   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2188   PetscFunctionReturn(PETSC_SUCCESS);
2189 }
2190 
2191 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2192 {
2193   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2194   Mat_MatTransMultDense *abt;
2195   MPI_Comm               comm;
2196   PetscMPIInt            size;
2197   PetscScalar           *cv, *sendbuf, *recvbuf;
2198   const PetscScalar     *av, *bv;
2199   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2200   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2201   PetscBLASInt           cm, cn, ck, alda, clda;
2202 
2203   PetscFunctionBegin;
2204   MatCheckProduct(C, 3);
2205   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2206   abt = (Mat_MatTransMultDense *)C->product->data;
2207   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2208   PetscCallMPI(MPI_Comm_size(comm, &size));
2209   PetscCall(MatDenseGetArrayRead(a->A, &av));
2210   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2211   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2212   PetscCall(MatDenseGetLDA(a->A, &i));
2213   PetscCall(PetscBLASIntCast(i, &alda));
2214   PetscCall(MatDenseGetLDA(b->A, &blda));
2215   PetscCall(MatDenseGetLDA(c->A, &i));
2216   PetscCall(PetscBLASIntCast(i, &clda));
2217   /* copy transpose of B into buf[0] */
2218   bn      = B->rmap->n;
2219   sendbuf = abt->buf[0];
2220   recvbuf = abt->buf[1];
2221   for (k = 0, j = 0; j < bn; j++) {
2222     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2223   }
2224   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2225   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2226   PetscCall(PetscBLASIntCast(cK, &ck));
2227   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2228   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2229   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2230   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2231   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2232   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2233   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2234   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2235   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2236   PetscFunctionReturn(PETSC_SUCCESS);
2237 }
2238 
2239 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2240 {
2241   Mat_MatTransMultDense *abt;
2242 
2243   PetscFunctionBegin;
2244   MatCheckProduct(C, 3);
2245   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2246   abt = (Mat_MatTransMultDense *)C->product->data;
2247   switch (abt->alg) {
2248   case 1:
2249     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2250     break;
2251   default:
2252     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2253     break;
2254   }
2255   PetscFunctionReturn(PETSC_SUCCESS);
2256 }
2257 
2258 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2259 {
2260   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2261 
2262   PetscFunctionBegin;
2263   PetscCall(MatDestroy(&ab->Ce));
2264   PetscCall(MatDestroy(&ab->Ae));
2265   PetscCall(MatDestroy(&ab->Be));
2266   PetscCall(PetscFree(ab));
2267   PetscFunctionReturn(PETSC_SUCCESS);
2268 }
2269 
2270 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2271 {
2272   Mat_MatMultDense *ab;
2273   Mat_MPIDense     *mdn = (Mat_MPIDense *)A->data;
2274 
2275   PetscFunctionBegin;
2276   MatCheckProduct(C, 3);
2277   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2278   ab = (Mat_MatMultDense *)C->product->data;
2279   if (ab->Ae && ab->Ce) {
2280 #if PetscDefined(HAVE_ELEMENTAL)
2281     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2282     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2283     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2284     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2285 #else
2286     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2287 #endif
2288   } else {
2289     const PetscScalar *read;
2290     PetscScalar       *write;
2291     PetscInt           lda;
2292 
2293     PetscCall(MatDenseGetLDA(B, &lda));
2294     PetscCall(MatDenseGetArrayRead(B, &read));
2295     PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2296     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2297     for (PetscInt i = 0; i < C->cmap->N; ++i) {
2298       PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2299       PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2300     }
2301     PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2302     PetscCall(MatDenseRestoreArrayRead(B, &read));
2303     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)(A->data))->A, ab->Be, ((Mat_MPIDense *)(C->data))->A));
2304   }
2305   PetscFunctionReturn(PETSC_SUCCESS);
2306 }
2307 
2308 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2309 {
2310   Mat_Product      *product = C->product;
2311   PetscInt          alg;
2312   Mat_MatMultDense *ab;
2313   PetscBool         flg;
2314 
2315   PetscFunctionBegin;
2316   MatCheckProduct(C, 4);
2317   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2318   /* check local size of A and B */
2319   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2320              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2321 
2322   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2323   alg = flg ? 0 : 1;
2324 
2325   /* setup C */
2326   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2327   PetscCall(MatSetType(C, MATMPIDENSE));
2328   PetscCall(MatSetUp(C));
2329 
2330   /* create data structure for reuse Cdense */
2331   PetscCall(PetscNew(&ab));
2332 
2333   switch (alg) {
2334   case 1: /* alg: "elemental" */
2335 #if PetscDefined(HAVE_ELEMENTAL)
2336     /* create elemental matrices Ae and Be */
2337     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2338     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2339     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2340     PetscCall(MatSetUp(ab->Ae));
2341     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2342 
2343     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2344     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2345     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2346     PetscCall(MatSetUp(ab->Be));
2347     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2348 
2349     /* compute symbolic Ce = Ae*Be */
2350     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2351     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2352 #else
2353     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2354 #endif
2355     break;
2356   default: /* alg: "petsc" */
2357     ab->Ae = NULL;
2358     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2359     ab->Ce = NULL;
2360     break;
2361   }
2362 
2363   C->product->data       = ab;
2364   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2365   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2366   PetscFunctionReturn(PETSC_SUCCESS);
2367 }
2368 
2369 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2370 {
2371   Mat_Product *product     = C->product;
2372   const char  *algTypes[2] = {"petsc", "elemental"};
2373   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2374   PetscBool    flg = PETSC_FALSE;
2375   PetscFunctionBegin;
2376 
2377   /* Set default algorithm */
2378   alg = 0; /* default is petsc */
2379   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2380   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2381 
2382   /* Get runtime option */
2383   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2384   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2385   PetscOptionsEnd();
2386   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2387 
2388   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2389   C->ops->productsymbolic = MatProductSymbolic_AB;
2390   PetscFunctionReturn(PETSC_SUCCESS);
2391 }
2392 
2393 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2394 {
2395   Mat_Product *product = C->product;
2396   Mat          A = product->A, B = product->B;
2397 
2398   PetscFunctionBegin;
2399   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2400              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2401   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2402   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2403   PetscFunctionReturn(PETSC_SUCCESS);
2404 }
2405 
2406 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2407 {
2408   Mat_Product *product     = C->product;
2409   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2410   PetscInt     alg, nalg = 2;
2411   PetscBool    flg = PETSC_FALSE;
2412 
2413   PetscFunctionBegin;
2414   /* Set default algorithm */
2415   alg = 0; /* default is allgatherv */
2416   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2417   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2418 
2419   /* Get runtime option */
2420   if (product->api_user) {
2421     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2422     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2423     PetscOptionsEnd();
2424   } else {
2425     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2426     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2427     PetscOptionsEnd();
2428   }
2429   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2430 
2431   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2432   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2433   PetscFunctionReturn(PETSC_SUCCESS);
2434 }
2435 
2436 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2437 {
2438   Mat_Product *product = C->product;
2439 
2440   PetscFunctionBegin;
2441   switch (product->type) {
2442   case MATPRODUCT_AB:
2443     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2444     break;
2445   case MATPRODUCT_AtB:
2446     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2447     break;
2448   case MATPRODUCT_ABt:
2449     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2450     break;
2451   default:
2452     break;
2453   }
2454   PetscFunctionReturn(PETSC_SUCCESS);
2455 }
2456