xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 7c441f3aff93c611491d4ea0564d57010b1fd4e9)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
12               matrix that represents the operator. For sequential matrices it returns itself.
13 
14     Input Parameter:
15 .      A - the sequential or MPI dense matrix
16 
17     Output Parameter:
18 .      B - the inner matrix
19 
20     Level: intermediate
21 
22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
25 {
26   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
27   PetscBool     flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
31   PetscValidPointer(B, 2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
36     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
45   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A, Bmat->A, s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
53 {
54   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
55   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
56   PetscScalar  *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A, &v));
60   PetscCall(MatDenseGetLDA(mat->A, &lda));
61   rend2 = PetscMin(rend, A->cmap->N);
62   if (rend2 > rstart) {
63     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
64     PetscCall(PetscLogFlops(rend2 - rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A, &v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
71 {
72   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
73   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
83 {
84   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
85   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
95 {
96   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
97   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
98   PetscScalar  *array;
99   MPI_Comm      comm;
100   PetscBool     flg;
101   Mat           B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A, &flg));
105   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
110     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
112     PetscCall(MatCreate(comm, &B));
113     PetscCall(MatSetSizes(B, m, m, m, m));
114     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
119     *a = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
126 {
127   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
128   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
129   PetscBool     roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i = 0; i < m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
139       } else {
140         for (j = 0; j < n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
143           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
159 {
160   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
161   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
162 
163   PetscFunctionBegin;
164   for (i = 0; i < m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j = 0; j < n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
172         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
180 {
181   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A, lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
189 {
190   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
191   PetscBool     iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
199     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
200     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
201     PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
202   }
203   PetscCall(MatDenseSetLDA(a->A, lda));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
208 {
209   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
210 
211   PetscFunctionBegin;
212   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
213   PetscCall(MatDenseGetArray(a->A, array));
214   PetscFunctionReturn(0);
215 }
216 
217 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
218 {
219   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
220 
221   PetscFunctionBegin;
222   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
223   PetscCall(MatDenseGetArrayRead(a->A, array));
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
228 {
229   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
230 
231   PetscFunctionBegin;
232   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
233   PetscCall(MatDenseGetArrayWrite(a->A, array));
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
238 {
239   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
240 
241   PetscFunctionBegin;
242   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
243   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
244   PetscCall(MatDensePlaceArray(a->A, array));
245   PetscFunctionReturn(0);
246 }
247 
248 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
249 {
250   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
251 
252   PetscFunctionBegin;
253   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
254   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
255   PetscCall(MatDenseResetArray(a->A));
256   PetscFunctionReturn(0);
257 }
258 
259 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
260 {
261   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
262 
263   PetscFunctionBegin;
264   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
265   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
266   PetscCall(MatDenseReplaceArray(a->A, array));
267   PetscFunctionReturn(0);
268 }
269 
270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
271 {
272   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
273   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
274   const PetscInt    *irow, *icol;
275   const PetscScalar *v;
276   PetscScalar       *bv;
277   Mat                newmat;
278   IS                 iscol_local;
279   MPI_Comm           comm_is, comm_mat;
280 
281   PetscFunctionBegin;
282   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
283   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
284   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
285 
286   PetscCall(ISAllGather(iscol, &iscol_local));
287   PetscCall(ISGetIndices(isrow, &irow));
288   PetscCall(ISGetIndices(iscol_local, &icol));
289   PetscCall(ISGetLocalSize(isrow, &nrows));
290   PetscCall(ISGetLocalSize(iscol, &ncols));
291   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
292 
293   /* No parallel redistribution currently supported! Should really check each index set
294      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
295      original matrix! */
296 
297   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
298   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
299 
300   /* Check submatrix call */
301   if (scall == MAT_REUSE_MATRIX) {
302     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
303     /* Really need to test rows and column sizes! */
304     newmat = *B;
305   } else {
306     /* Create and fill new matrix */
307     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
308     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
309     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
310     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
311   }
312 
313   /* Now extract the data pointers and do the copy, column at a time */
314   newmatd = (Mat_MPIDense *)newmat->data;
315   PetscCall(MatDenseGetArray(newmatd->A, &bv));
316   PetscCall(MatDenseGetArrayRead(mat->A, &v));
317   PetscCall(MatDenseGetLDA(mat->A, &lda));
318   for (i = 0; i < Ncols; i++) {
319     const PetscScalar *av = v + lda * icol[i];
320     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
321   }
322   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
323   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
324 
325   /* Assemble the matrices so that the correct flags are set */
326   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
327   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
328 
329   /* Free work space */
330   PetscCall(ISRestoreIndices(isrow, &irow));
331   PetscCall(ISRestoreIndices(iscol_local, &icol));
332   PetscCall(ISDestroy(&iscol_local));
333   *B = newmat;
334   PetscFunctionReturn(0);
335 }
336 
337 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
338 {
339   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
340 
341   PetscFunctionBegin;
342   PetscCall(MatDenseRestoreArray(a->A, array));
343   PetscFunctionReturn(0);
344 }
345 
346 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
347 {
348   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
349 
350   PetscFunctionBegin;
351   PetscCall(MatDenseRestoreArrayRead(a->A, array));
352   PetscFunctionReturn(0);
353 }
354 
355 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
356 {
357   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
358 
359   PetscFunctionBegin;
360   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
361   PetscFunctionReturn(0);
362 }
363 
364 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
365 {
366   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
367   PetscInt      nstash, reallocs;
368 
369   PetscFunctionBegin;
370   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
371 
372   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
373   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
374   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
375   PetscFunctionReturn(0);
376 }
377 
378 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
379 {
380   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381   PetscInt      i, *row, *col, flg, j, rstart, ncols;
382   PetscMPIInt   n;
383   PetscScalar  *val;
384 
385   PetscFunctionBegin;
386   if (!mdn->donotstash && !mat->nooffprocentries) {
387     /*  wait on receives */
388     while (1) {
389       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
390       if (!flg) break;
391 
392       for (i = 0; i < n;) {
393         /* Now identify the consecutive vals belonging to the same row */
394         for (j = i, rstart = row[j]; j < n; j++) {
395           if (row[j] != rstart) break;
396         }
397         if (j < n) ncols = j - i;
398         else ncols = n - i;
399         /* Now assemble all these values with a single function call */
400         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
401         i = j;
402       }
403     }
404     PetscCall(MatStashScatterEnd_Private(&mat->stash));
405   }
406 
407   PetscCall(MatAssemblyBegin(mdn->A, mode));
408   PetscCall(MatAssemblyEnd(mdn->A, mode));
409   PetscFunctionReturn(0);
410 }
411 
412 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
413 {
414   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
415 
416   PetscFunctionBegin;
417   PetscCall(MatZeroEntries(l->A));
418   PetscFunctionReturn(0);
419 }
420 
421 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
422 {
423   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
424   PetscInt      i, len, *lrows;
425 
426   PetscFunctionBegin;
427   /* get locally owned rows */
428   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
429   /* fix right hand side if needed */
430   if (x && b) {
431     const PetscScalar *xx;
432     PetscScalar       *bb;
433 
434     PetscCall(VecGetArrayRead(x, &xx));
435     PetscCall(VecGetArrayWrite(b, &bb));
436     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
437     PetscCall(VecRestoreArrayRead(x, &xx));
438     PetscCall(VecRestoreArrayWrite(b, &bb));
439   }
440   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
441   if (diag != 0.0) {
442     Vec d;
443 
444     PetscCall(MatCreateVecs(A, NULL, &d));
445     PetscCall(VecSet(d, diag));
446     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
447     PetscCall(VecDestroy(&d));
448   }
449   PetscCall(PetscFree(lrows));
450   PetscFunctionReturn(0);
451 }
452 
453 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
454 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
455 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
456 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
457 
458 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
459 {
460   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
461   const PetscScalar *ax;
462   PetscScalar       *ay;
463   PetscMemType       axmtype, aymtype;
464 
465   PetscFunctionBegin;
466   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
467   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
468   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
469   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
470   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
471   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
472   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
473   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
474   PetscFunctionReturn(0);
475 }
476 
477 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
478 {
479   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
480   const PetscScalar *ax;
481   PetscScalar       *ay;
482   PetscMemType       axmtype, aymtype;
483 
484   PetscFunctionBegin;
485   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
486   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
487   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
488   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
489   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
490   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
491   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
492   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
493   PetscFunctionReturn(0);
494 }
495 
496 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
497 {
498   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
499   const PetscScalar *ax;
500   PetscScalar       *ay;
501   PetscMemType       axmtype, aymtype;
502 
503   PetscFunctionBegin;
504   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
505   PetscCall(VecSet(yy, 0.0));
506   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
507   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
508   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
509   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
510   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
511   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
512   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
517 {
518   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
519   const PetscScalar *ax;
520   PetscScalar       *ay;
521   PetscMemType       axmtype, aymtype;
522 
523   PetscFunctionBegin;
524   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
525   PetscCall(VecCopy(yy, zz));
526   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
527   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
528   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
529   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
530   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
531   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
532   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
537 {
538   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
539   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
540   PetscScalar       *x, zero = 0.0;
541   const PetscScalar *av;
542 
543   PetscFunctionBegin;
544   PetscCall(VecSet(v, zero));
545   PetscCall(VecGetArray(v, &x));
546   PetscCall(VecGetSize(v, &n));
547   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
548   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
549   radd = A->rmap->rstart * m;
550   PetscCall(MatDenseGetArrayRead(a->A, &av));
551   PetscCall(MatDenseGetLDA(a->A, &lda));
552   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
553   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
554   PetscCall(VecRestoreArray(v, &x));
555   PetscFunctionReturn(0);
556 }
557 
558 PetscErrorCode MatDestroy_MPIDense(Mat mat)
559 {
560   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
561 
562   PetscFunctionBegin;
563 #if defined(PETSC_USE_LOG)
564   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
565 #endif
566   PetscCall(MatStashDestroy_Private(&mat->stash));
567   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
568   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
569   PetscCall(MatDestroy(&mdn->A));
570   PetscCall(VecDestroy(&mdn->lvec));
571   PetscCall(PetscSFDestroy(&mdn->Mvctx));
572   PetscCall(VecDestroy(&mdn->cvec));
573   PetscCall(MatDestroy(&mdn->cmat));
574 
575   PetscCall(PetscFree(mat->data));
576   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
577 
578   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
580   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
581   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
582   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
591 #if defined(PETSC_HAVE_ELEMENTAL)
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
593 #endif
594 #if defined(PETSC_HAVE_SCALAPACK)
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
596 #endif
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
600 #if defined(PETSC_HAVE_CUDA)
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
605   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
618 #endif
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
623   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
629 
630   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
631   PetscFunctionReturn(0);
632 }
633 
634 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
635 
636 #include <petscdraw.h>
637 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
638 {
639   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
640   PetscMPIInt       rank;
641   PetscViewerType   vtype;
642   PetscBool         iascii, isdraw;
643   PetscViewer       sviewer;
644   PetscViewerFormat format;
645 
646   PetscFunctionBegin;
647   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
648   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
649   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
650   if (iascii) {
651     PetscCall(PetscViewerGetType(viewer, &vtype));
652     PetscCall(PetscViewerGetFormat(viewer, &format));
653     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
654       MatInfo info;
655       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
656       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
657       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
658                                                    (PetscInt)info.memory));
659       PetscCall(PetscViewerFlush(viewer));
660       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
661       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
662       PetscFunctionReturn(0);
663     } else if (format == PETSC_VIEWER_ASCII_INFO) {
664       PetscFunctionReturn(0);
665     }
666   } else if (isdraw) {
667     PetscDraw draw;
668     PetscBool isnull;
669 
670     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
671     PetscCall(PetscDrawIsNull(draw, &isnull));
672     if (isnull) PetscFunctionReturn(0);
673   }
674 
675   {
676     /* assemble the entire matrix onto first processor. */
677     Mat          A;
678     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
679     PetscInt    *cols;
680     PetscScalar *vals;
681 
682     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
683     if (rank == 0) {
684       PetscCall(MatSetSizes(A, M, N, M, N));
685     } else {
686       PetscCall(MatSetSizes(A, 0, 0, M, N));
687     }
688     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
689     PetscCall(MatSetType(A, MATMPIDENSE));
690     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
691 
692     /* Copy the matrix ... This isn't the most efficient means,
693        but it's quick for now */
694     A->insertmode = INSERT_VALUES;
695 
696     row = mat->rmap->rstart;
697     m   = mdn->A->rmap->n;
698     for (i = 0; i < m; i++) {
699       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
700       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
701       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
702       row++;
703     }
704 
705     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
706     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
707     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
708     if (rank == 0) {
709       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
710       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
711     }
712     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
713     PetscCall(PetscViewerFlush(viewer));
714     PetscCall(MatDestroy(&A));
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
720 {
721   PetscBool iascii, isbinary, isdraw, issocket;
722 
723   PetscFunctionBegin;
724   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
725   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
726   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
727   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
728 
729   if (iascii || issocket || isdraw) {
730     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
731   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
732   PetscFunctionReturn(0);
733 }
734 
735 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
736 {
737   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
738   Mat            mdn = mat->A;
739   PetscLogDouble isend[5], irecv[5];
740 
741   PetscFunctionBegin;
742   info->block_size = 1.0;
743 
744   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
745 
746   isend[0] = info->nz_used;
747   isend[1] = info->nz_allocated;
748   isend[2] = info->nz_unneeded;
749   isend[3] = info->memory;
750   isend[4] = info->mallocs;
751   if (flag == MAT_LOCAL) {
752     info->nz_used      = isend[0];
753     info->nz_allocated = isend[1];
754     info->nz_unneeded  = isend[2];
755     info->memory       = isend[3];
756     info->mallocs      = isend[4];
757   } else if (flag == MAT_GLOBAL_MAX) {
758     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
759 
760     info->nz_used      = irecv[0];
761     info->nz_allocated = irecv[1];
762     info->nz_unneeded  = irecv[2];
763     info->memory       = irecv[3];
764     info->mallocs      = irecv[4];
765   } else if (flag == MAT_GLOBAL_SUM) {
766     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
767 
768     info->nz_used      = irecv[0];
769     info->nz_allocated = irecv[1];
770     info->nz_unneeded  = irecv[2];
771     info->memory       = irecv[3];
772     info->mallocs      = irecv[4];
773   }
774   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
775   info->fill_ratio_needed = 0;
776   info->factor_mallocs    = 0;
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
781 {
782   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
783 
784   PetscFunctionBegin;
785   switch (op) {
786   case MAT_NEW_NONZERO_LOCATIONS:
787   case MAT_NEW_NONZERO_LOCATION_ERR:
788   case MAT_NEW_NONZERO_ALLOCATION_ERR:
789     MatCheckPreallocated(A, 1);
790     PetscCall(MatSetOption(a->A, op, flg));
791     break;
792   case MAT_ROW_ORIENTED:
793     MatCheckPreallocated(A, 1);
794     a->roworiented = flg;
795     PetscCall(MatSetOption(a->A, op, flg));
796     break;
797   case MAT_FORCE_DIAGONAL_ENTRIES:
798   case MAT_KEEP_NONZERO_PATTERN:
799   case MAT_USE_HASH_TABLE:
800   case MAT_SORTED_FULL:
801     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
802     break;
803   case MAT_IGNORE_OFF_PROC_ENTRIES:
804     a->donotstash = flg;
805     break;
806   case MAT_SYMMETRIC:
807   case MAT_STRUCTURALLY_SYMMETRIC:
808   case MAT_HERMITIAN:
809   case MAT_SYMMETRY_ETERNAL:
810   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
811   case MAT_SPD:
812   case MAT_IGNORE_LOWER_TRIANGULAR:
813   case MAT_IGNORE_ZERO_ENTRIES:
814   case MAT_SPD_ETERNAL:
815     /* if the diagonal matrix is square it inherits some of the properties above */
816     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
817     break;
818   default:
819     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
827   const PetscScalar *l;
828   PetscScalar        x, *v, *vv, *r;
829   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
830 
831   PetscFunctionBegin;
832   PetscCall(MatDenseGetArray(mdn->A, &vv));
833   PetscCall(MatDenseGetLDA(mdn->A, &lda));
834   PetscCall(MatGetLocalSize(A, &s2, &s3));
835   if (ll) {
836     PetscCall(VecGetLocalSize(ll, &s2a));
837     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
838     PetscCall(VecGetArrayRead(ll, &l));
839     for (i = 0; i < m; i++) {
840       x = l[i];
841       v = vv + i;
842       for (j = 0; j < n; j++) {
843         (*v) *= x;
844         v += lda;
845       }
846     }
847     PetscCall(VecRestoreArrayRead(ll, &l));
848     PetscCall(PetscLogFlops(1.0 * n * m));
849   }
850   if (rr) {
851     const PetscScalar *ar;
852 
853     PetscCall(VecGetLocalSize(rr, &s3a));
854     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
855     PetscCall(VecGetArrayRead(rr, &ar));
856     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
857     PetscCall(VecGetArray(mdn->lvec, &r));
858     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
859     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
860     PetscCall(VecRestoreArrayRead(rr, &ar));
861     for (i = 0; i < n; i++) {
862       x = r[i];
863       v = vv + i * lda;
864       for (j = 0; j < m; j++) (*v++) *= x;
865     }
866     PetscCall(VecRestoreArray(mdn->lvec, &r));
867     PetscCall(PetscLogFlops(1.0 * n * m));
868   }
869   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
870   PetscFunctionReturn(0);
871 }
872 
873 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
874 {
875   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
876   PetscInt           i, j;
877   PetscMPIInt        size;
878   PetscReal          sum = 0.0;
879   const PetscScalar *av, *v;
880 
881   PetscFunctionBegin;
882   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
883   v = av;
884   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
885   if (size == 1) {
886     PetscCall(MatNorm(mdn->A, type, nrm));
887   } else {
888     if (type == NORM_FROBENIUS) {
889       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
890         sum += PetscRealPart(PetscConj(*v) * (*v));
891         v++;
892       }
893       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
894       *nrm = PetscSqrtReal(*nrm);
895       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
896     } else if (type == NORM_1) {
897       PetscReal *tmp, *tmp2;
898       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
899       *nrm = 0.0;
900       v    = av;
901       for (j = 0; j < mdn->A->cmap->n; j++) {
902         for (i = 0; i < mdn->A->rmap->n; i++) {
903           tmp[j] += PetscAbsScalar(*v);
904           v++;
905         }
906       }
907       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
908       for (j = 0; j < A->cmap->N; j++) {
909         if (tmp2[j] > *nrm) *nrm = tmp2[j];
910       }
911       PetscCall(PetscFree2(tmp, tmp2));
912       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
913     } else if (type == NORM_INFINITY) { /* max row norm */
914       PetscReal ntemp;
915       PetscCall(MatNorm(mdn->A, type, &ntemp));
916       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
917     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
918   }
919   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
920   PetscFunctionReturn(0);
921 }
922 
923 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
924 {
925   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
926   Mat           B;
927   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
928   PetscInt      j, i, lda;
929   PetscScalar  *v;
930 
931   PetscFunctionBegin;
932   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
933   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
934     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
935     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
936     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
937     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
938   } else B = *matout;
939 
940   m = a->A->rmap->n;
941   n = a->A->cmap->n;
942   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
943   PetscCall(MatDenseGetLDA(a->A, &lda));
944   PetscCall(PetscMalloc1(m, &rwork));
945   for (i = 0; i < m; i++) rwork[i] = rstart + i;
946   for (j = 0; j < n; j++) {
947     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
948     v += lda;
949   }
950   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
951   PetscCall(PetscFree(rwork));
952   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
953   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
954   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
955     *matout = B;
956   } else {
957     PetscCall(MatHeaderMerge(A, &B));
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
963 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
964 
965 PetscErrorCode MatSetUp_MPIDense(Mat A)
966 {
967   PetscFunctionBegin;
968   PetscCall(PetscLayoutSetUp(A->rmap));
969   PetscCall(PetscLayoutSetUp(A->cmap));
970   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
971   PetscFunctionReturn(0);
972 }
973 
974 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
975 {
976   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
977 
978   PetscFunctionBegin;
979   PetscCall(MatAXPY(A->A, alpha, B->A, str));
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
986 
987   PetscFunctionBegin;
988   PetscCall(MatConjugate(a->A));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatRealPart_MPIDense(Mat A)
993 {
994   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
995 
996   PetscFunctionBegin;
997   PetscCall(MatRealPart(a->A));
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1002 {
1003   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1004 
1005   PetscFunctionBegin;
1006   PetscCall(MatImaginaryPart(a->A));
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1011 {
1012   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1013 
1014   PetscFunctionBegin;
1015   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1016   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1017   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1022 
1023 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1024 {
1025   PetscInt      i, m, n;
1026   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1027   PetscReal    *work;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(MatGetSize(A, &m, &n));
1031   PetscCall(PetscMalloc1(n, &work));
1032   if (type == REDUCTION_MEAN_REALPART) {
1033     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1034   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1035     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1036   } else {
1037     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1038   }
1039   if (type == NORM_2) {
1040     for (i = 0; i < n; i++) work[i] *= work[i];
1041   }
1042   if (type == NORM_INFINITY) {
1043     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1044   } else {
1045     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1046   }
1047   PetscCall(PetscFree(work));
1048   if (type == NORM_2) {
1049     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1050   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1051     for (i = 0; i < n; i++) reductions[i] /= m;
1052   }
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #if defined(PETSC_HAVE_CUDA)
1057 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1058 {
1059   PetscScalar *da;
1060   PetscInt     lda;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatDenseCUDAGetArray(A, &da));
1064   PetscCall(MatDenseGetLDA(A, &lda));
1065   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1066   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1067   PetscCall(MatDenseCUDARestoreArray(A, &da));
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1072 {
1073   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1074   PetscInt      lda;
1075 
1076   PetscFunctionBegin;
1077   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1078   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1079   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1080   a->vecinuse = col + 1;
1081   PetscCall(MatDenseGetLDA(a->A, &lda));
1082   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1083   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1084   *v = a->cvec;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1089 {
1090   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1091 
1092   PetscFunctionBegin;
1093   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1094   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1095   a->vecinuse = 0;
1096   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1097   PetscCall(VecCUDAResetArray(a->cvec));
1098   if (v) *v = NULL;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1103 {
1104   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1105   PetscInt      lda;
1106 
1107   PetscFunctionBegin;
1108   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1109   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1110   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1111   a->vecinuse = col + 1;
1112   PetscCall(MatDenseGetLDA(a->A, &lda));
1113   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1114   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1115   PetscCall(VecLockReadPush(a->cvec));
1116   *v = a->cvec;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1121 {
1122   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1123 
1124   PetscFunctionBegin;
1125   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1126   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1127   a->vecinuse = 0;
1128   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1129   PetscCall(VecLockReadPop(a->cvec));
1130   PetscCall(VecCUDAResetArray(a->cvec));
1131   if (v) *v = NULL;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136 {
1137   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138   PetscInt      lda;
1139 
1140   PetscFunctionBegin;
1141   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1142   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1143   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1144   a->vecinuse = col + 1;
1145   PetscCall(MatDenseGetLDA(a->A, &lda));
1146   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1147   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1148   *v = a->cvec;
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1153 {
1154   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1155 
1156   PetscFunctionBegin;
1157   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1158   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1159   a->vecinuse = 0;
1160   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1161   PetscCall(VecCUDAResetArray(a->cvec));
1162   if (v) *v = NULL;
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1167 {
1168   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1172   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1173   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1178 {
1179   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1180 
1181   PetscFunctionBegin;
1182   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1183   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1184   PetscCall(MatDenseCUDAResetArray(l->A));
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1189 {
1190   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1191 
1192   PetscFunctionBegin;
1193   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1194   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1195   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1200 {
1201   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1211 
1212   PetscFunctionBegin;
1213   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1218 {
1219   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1220 
1221   PetscFunctionBegin;
1222   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1227 {
1228   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1229 
1230   PetscFunctionBegin;
1231   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1236 {
1237   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1238 
1239   PetscFunctionBegin;
1240   PetscCall(MatDenseCUDAGetArray(l->A, a));
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1245 {
1246   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1254 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1255 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1256 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1257 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1258 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1259 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1260 
1261 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1262 {
1263   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1264 
1265   PetscFunctionBegin;
1266   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1267   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1268   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1269   mat->boundtocpu = bind;
1270   if (!bind) {
1271     PetscBool iscuda;
1272 
1273     PetscCall(PetscFree(mat->defaultrandtype));
1274     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1275     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1276     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1277     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1278     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1279     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1280     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1281     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1282     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1283     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1284     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1285     mat->ops->shift = MatShift_MPIDenseCUDA;
1286   } else {
1287     PetscCall(PetscFree(mat->defaultrandtype));
1288     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1289     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1290     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1291     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1292     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1293     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1294     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1295     mat->ops->shift = MatShift_MPIDense;
1296   }
1297   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1302 {
1303   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1304   PetscBool     iscuda;
1305 
1306   PetscFunctionBegin;
1307   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1308   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1309   if (!iscuda) PetscFunctionReturn(0);
1310   PetscCall(PetscLayoutSetUp(A->rmap));
1311   PetscCall(PetscLayoutSetUp(A->cmap));
1312   if (!d->A) {
1313     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1314     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1315   }
1316   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1317   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1318   A->preallocated = PETSC_TRUE;
1319   A->assembled    = PETSC_TRUE;
1320   PetscFunctionReturn(0);
1321 }
1322 #endif
1323 
1324 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1325 {
1326   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1327 
1328   PetscFunctionBegin;
1329   PetscCall(MatSetRandom(d->A, rctx));
1330 #if defined(PETSC_HAVE_DEVICE)
1331   x->offloadmask = d->A->offloadmask;
1332 #endif
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1337 {
1338   PetscFunctionBegin;
1339   *missing = PETSC_FALSE;
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1344 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1345 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1346 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1347 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1348 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1349 
1350 /* -------------------------------------------------------------------*/
1351 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1352                                        MatGetRow_MPIDense,
1353                                        MatRestoreRow_MPIDense,
1354                                        MatMult_MPIDense,
1355                                        /*  4*/ MatMultAdd_MPIDense,
1356                                        MatMultTranspose_MPIDense,
1357                                        MatMultTransposeAdd_MPIDense,
1358                                        NULL,
1359                                        NULL,
1360                                        NULL,
1361                                        /* 10*/ NULL,
1362                                        NULL,
1363                                        NULL,
1364                                        NULL,
1365                                        MatTranspose_MPIDense,
1366                                        /* 15*/ MatGetInfo_MPIDense,
1367                                        MatEqual_MPIDense,
1368                                        MatGetDiagonal_MPIDense,
1369                                        MatDiagonalScale_MPIDense,
1370                                        MatNorm_MPIDense,
1371                                        /* 20*/ MatAssemblyBegin_MPIDense,
1372                                        MatAssemblyEnd_MPIDense,
1373                                        MatSetOption_MPIDense,
1374                                        MatZeroEntries_MPIDense,
1375                                        /* 24*/ MatZeroRows_MPIDense,
1376                                        NULL,
1377                                        NULL,
1378                                        NULL,
1379                                        NULL,
1380                                        /* 29*/ MatSetUp_MPIDense,
1381                                        NULL,
1382                                        NULL,
1383                                        MatGetDiagonalBlock_MPIDense,
1384                                        NULL,
1385                                        /* 34*/ MatDuplicate_MPIDense,
1386                                        NULL,
1387                                        NULL,
1388                                        NULL,
1389                                        NULL,
1390                                        /* 39*/ MatAXPY_MPIDense,
1391                                        MatCreateSubMatrices_MPIDense,
1392                                        NULL,
1393                                        MatGetValues_MPIDense,
1394                                        MatCopy_MPIDense,
1395                                        /* 44*/ NULL,
1396                                        MatScale_MPIDense,
1397                                        MatShift_MPIDense,
1398                                        NULL,
1399                                        NULL,
1400                                        /* 49*/ MatSetRandom_MPIDense,
1401                                        NULL,
1402                                        NULL,
1403                                        NULL,
1404                                        NULL,
1405                                        /* 54*/ NULL,
1406                                        NULL,
1407                                        NULL,
1408                                        NULL,
1409                                        NULL,
1410                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1411                                        MatDestroy_MPIDense,
1412                                        MatView_MPIDense,
1413                                        NULL,
1414                                        NULL,
1415                                        /* 64*/ NULL,
1416                                        NULL,
1417                                        NULL,
1418                                        NULL,
1419                                        NULL,
1420                                        /* 69*/ NULL,
1421                                        NULL,
1422                                        NULL,
1423                                        NULL,
1424                                        NULL,
1425                                        /* 74*/ NULL,
1426                                        NULL,
1427                                        NULL,
1428                                        NULL,
1429                                        NULL,
1430                                        /* 79*/ NULL,
1431                                        NULL,
1432                                        NULL,
1433                                        NULL,
1434                                        /* 83*/ MatLoad_MPIDense,
1435                                        NULL,
1436                                        NULL,
1437                                        NULL,
1438                                        NULL,
1439                                        NULL,
1440                                        /* 89*/ NULL,
1441                                        NULL,
1442                                        NULL,
1443                                        NULL,
1444                                        NULL,
1445                                        /* 94*/ NULL,
1446                                        NULL,
1447                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1448                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1449                                        NULL,
1450                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1451                                        NULL,
1452                                        NULL,
1453                                        MatConjugate_MPIDense,
1454                                        NULL,
1455                                        /*104*/ NULL,
1456                                        MatRealPart_MPIDense,
1457                                        MatImaginaryPart_MPIDense,
1458                                        NULL,
1459                                        NULL,
1460                                        /*109*/ NULL,
1461                                        NULL,
1462                                        NULL,
1463                                        MatGetColumnVector_MPIDense,
1464                                        MatMissingDiagonal_MPIDense,
1465                                        /*114*/ NULL,
1466                                        NULL,
1467                                        NULL,
1468                                        NULL,
1469                                        NULL,
1470                                        /*119*/ NULL,
1471                                        NULL,
1472                                        NULL,
1473                                        NULL,
1474                                        NULL,
1475                                        /*124*/ NULL,
1476                                        MatGetColumnReductions_MPIDense,
1477                                        NULL,
1478                                        NULL,
1479                                        NULL,
1480                                        /*129*/ NULL,
1481                                        NULL,
1482                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1483                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1484                                        NULL,
1485                                        /*134*/ NULL,
1486                                        NULL,
1487                                        NULL,
1488                                        NULL,
1489                                        NULL,
1490                                        /*139*/ NULL,
1491                                        NULL,
1492                                        NULL,
1493                                        NULL,
1494                                        NULL,
1495                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1496                                        /*145*/ NULL,
1497                                        NULL,
1498                                        NULL,
1499                                        NULL,
1500                                        NULL,
1501                                        /*150*/ NULL};
1502 
1503 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1504 {
1505   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1506   PetscBool     iscuda;
1507 
1508   PetscFunctionBegin;
1509   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1510   PetscCall(PetscLayoutSetUp(mat->rmap));
1511   PetscCall(PetscLayoutSetUp(mat->cmap));
1512   if (!a->A) {
1513     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1514     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1515   }
1516   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1517   PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1518   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1519 #if defined(PETSC_HAVE_DEVICE)
1520   mat->offloadmask = a->A->offloadmask;
1521 #endif
1522   mat->preallocated = PETSC_TRUE;
1523   mat->assembled    = PETSC_TRUE;
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1528 {
1529   Mat B, C;
1530 
1531   PetscFunctionBegin;
1532   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1533   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1534   PetscCall(MatDestroy(&C));
1535   if (reuse == MAT_REUSE_MATRIX) {
1536     C = *newmat;
1537   } else C = NULL;
1538   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1539   PetscCall(MatDestroy(&B));
1540   if (reuse == MAT_INPLACE_MATRIX) {
1541     PetscCall(MatHeaderReplace(A, &C));
1542   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1547 {
1548   Mat B, C;
1549 
1550   PetscFunctionBegin;
1551   PetscCall(MatDenseGetLocalMatrix(A, &C));
1552   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1553   if (reuse == MAT_REUSE_MATRIX) {
1554     C = *newmat;
1555   } else C = NULL;
1556   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1557   PetscCall(MatDestroy(&B));
1558   if (reuse == MAT_INPLACE_MATRIX) {
1559     PetscCall(MatHeaderReplace(A, &C));
1560   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #if defined(PETSC_HAVE_ELEMENTAL)
1565 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1566 {
1567   Mat          mat_elemental;
1568   PetscScalar *v;
1569   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1570 
1571   PetscFunctionBegin;
1572   if (reuse == MAT_REUSE_MATRIX) {
1573     mat_elemental = *newmat;
1574     PetscCall(MatZeroEntries(*newmat));
1575   } else {
1576     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1577     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1578     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1579     PetscCall(MatSetUp(mat_elemental));
1580     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1581   }
1582 
1583   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1584   for (i = 0; i < N; i++) cols[i] = i;
1585   for (i = 0; i < m; i++) rows[i] = rstart + i;
1586 
1587   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1588   PetscCall(MatDenseGetArray(A, &v));
1589   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1590   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1592   PetscCall(MatDenseRestoreArray(A, &v));
1593   PetscCall(PetscFree2(rows, cols));
1594 
1595   if (reuse == MAT_INPLACE_MATRIX) {
1596     PetscCall(MatHeaderReplace(A, &mat_elemental));
1597   } else {
1598     *newmat = mat_elemental;
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 #endif
1603 
1604 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1605 {
1606   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1607 
1608   PetscFunctionBegin;
1609   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1614 {
1615   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1616 
1617   PetscFunctionBegin;
1618   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1623 {
1624   Mat_MPIDense *mat;
1625   PetscInt      m, nloc, N;
1626 
1627   PetscFunctionBegin;
1628   PetscCall(MatGetSize(inmat, &m, &N));
1629   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1630   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1631     PetscInt sum;
1632 
1633     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1634     /* Check sum(n) = N */
1635     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1636     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1637 
1638     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1639     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1640   }
1641 
1642   /* numeric phase */
1643   mat = (Mat_MPIDense *)(*outmat)->data;
1644   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #if defined(PETSC_HAVE_CUDA)
1649 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1650 {
1651   Mat           B;
1652   Mat_MPIDense *m;
1653 
1654   PetscFunctionBegin;
1655   if (reuse == MAT_INITIAL_MATRIX) {
1656     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1657   } else if (reuse == MAT_REUSE_MATRIX) {
1658     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1659   }
1660 
1661   B = *newmat;
1662   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1663   PetscCall(PetscFree(B->defaultvectype));
1664   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1665   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1675   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1676   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1677   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1678   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1679   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1680   m = (Mat_MPIDense *)(B)->data;
1681   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1682   B->ops->bindtocpu = NULL;
1683   B->offloadmask    = PETSC_OFFLOAD_CPU;
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1688 {
1689   Mat           B;
1690   Mat_MPIDense *m;
1691 
1692   PetscFunctionBegin;
1693   if (reuse == MAT_INITIAL_MATRIX) {
1694     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1695   } else if (reuse == MAT_REUSE_MATRIX) {
1696     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1697   }
1698 
1699   B = *newmat;
1700   PetscCall(PetscFree(B->defaultvectype));
1701   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
1702   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
1703   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
1704   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1705   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1706   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1707   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1708   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
1709   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1710   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1711   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
1712   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1713   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1714   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
1715   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
1716   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
1717   m = (Mat_MPIDense *)(B->data);
1718   if (m->A) {
1719     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
1720     B->offloadmask = PETSC_OFFLOAD_BOTH;
1721   } else {
1722     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1723   }
1724   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
1725 
1726   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1727   PetscFunctionReturn(0);
1728 }
1729 #endif
1730 
1731 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1732 {
1733   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1734   PetscInt      lda;
1735 
1736   PetscFunctionBegin;
1737   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1738   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1739   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1740   a->vecinuse = col + 1;
1741   PetscCall(MatDenseGetLDA(a->A, &lda));
1742   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1743   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1744   *v = a->cvec;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1749 {
1750   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1751 
1752   PetscFunctionBegin;
1753   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1754   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1755   a->vecinuse = 0;
1756   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1757   PetscCall(VecResetArray(a->cvec));
1758   if (v) *v = NULL;
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1763 {
1764   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1765   PetscInt      lda;
1766 
1767   PetscFunctionBegin;
1768   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1769   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1770   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1771   a->vecinuse = col + 1;
1772   PetscCall(MatDenseGetLDA(a->A, &lda));
1773   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1774   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1775   PetscCall(VecLockReadPush(a->cvec));
1776   *v = a->cvec;
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1781 {
1782   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1783 
1784   PetscFunctionBegin;
1785   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1786   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1787   a->vecinuse = 0;
1788   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1789   PetscCall(VecLockReadPop(a->cvec));
1790   PetscCall(VecResetArray(a->cvec));
1791   if (v) *v = NULL;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1796 {
1797   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1798   PetscInt      lda;
1799 
1800   PetscFunctionBegin;
1801   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1802   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1803   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1804   a->vecinuse = col + 1;
1805   PetscCall(MatDenseGetLDA(a->A, &lda));
1806   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1807   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1808   *v = a->cvec;
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1813 {
1814   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1815 
1816   PetscFunctionBegin;
1817   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1818   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1819   a->vecinuse = 0;
1820   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1821   PetscCall(VecResetArray(a->cvec));
1822   if (v) *v = NULL;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1827 {
1828   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1829   Mat_MPIDense *c;
1830   MPI_Comm      comm;
1831   PetscInt      pbegin, pend;
1832 
1833   PetscFunctionBegin;
1834   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1835   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1836   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1837   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1838   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1839   if (!a->cmat) {
1840     PetscCall(MatCreate(comm, &a->cmat));
1841     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1842     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1843     else {
1844       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1845       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1846       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1847     }
1848     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1849     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1850   } else {
1851     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1852     if (same && a->cmat->rmap->N != A->rmap->N) {
1853       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1854       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1855     }
1856     if (!same) {
1857       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1858       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1859       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1860       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1861       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1862     }
1863     if (cend - cbegin != a->cmat->cmap->N) {
1864       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1865       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1866       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1867       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1868     }
1869   }
1870   c = (Mat_MPIDense *)a->cmat->data;
1871   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1872   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1873 
1874   a->cmat->preallocated = PETSC_TRUE;
1875   a->cmat->assembled    = PETSC_TRUE;
1876 #if defined(PETSC_HAVE_DEVICE)
1877   a->cmat->offloadmask = c->A->offloadmask;
1878 #endif
1879   a->matinuse = cbegin + 1;
1880   *v          = a->cmat;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1885 {
1886   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1887   Mat_MPIDense *c;
1888 
1889   PetscFunctionBegin;
1890   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1891   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1892   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1893   a->matinuse = 0;
1894   c           = (Mat_MPIDense *)a->cmat->data;
1895   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1896   if (v) *v = NULL;
1897 #if defined(PETSC_HAVE_DEVICE)
1898   A->offloadmask = a->A->offloadmask;
1899 #endif
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 /*MC
1904    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1905 
1906    Options Database Keys:
1907 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1908 
1909   Level: beginner
1910 
1911 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1912 M*/
1913 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1914 {
1915   Mat_MPIDense *a;
1916 
1917   PetscFunctionBegin;
1918   PetscCall(PetscNew(&a));
1919   mat->data = (void *)a;
1920   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
1921 
1922   mat->insertmode = NOT_SET_VALUES;
1923 
1924   /* build cache for off array entries formed */
1925   a->donotstash = PETSC_FALSE;
1926 
1927   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1928 
1929   /* stuff used for matrix vector multiply */
1930   a->lvec        = NULL;
1931   a->Mvctx       = NULL;
1932   a->roworiented = PETSC_TRUE;
1933 
1934   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1935   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1936   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1937   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1938   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1939   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1940   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1941   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1942   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1944   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1945   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1946   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1947   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1948   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1949   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1950   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1951   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1952   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1953   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1954   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1955 #if defined(PETSC_HAVE_ELEMENTAL)
1956   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1957 #endif
1958 #if defined(PETSC_HAVE_SCALAPACK)
1959   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1960 #endif
1961 #if defined(PETSC_HAVE_CUDA)
1962   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1963 #endif
1964   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1965   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1966   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1967 #if defined(PETSC_HAVE_CUDA)
1968   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1969   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1970 #endif
1971 
1972   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1973   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1974   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 /*MC
1979    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1980 
1981    Options Database Keys:
1982 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
1983 
1984   Level: beginner
1985 
1986 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1987 M*/
1988 #if defined(PETSC_HAVE_CUDA)
1989   #include <petsc/private/deviceimpl.h>
1990 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1991 {
1992   PetscFunctionBegin;
1993   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1994   PetscCall(MatCreate_MPIDense(B));
1995   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
1996   PetscFunctionReturn(0);
1997 }
1998 #endif
1999 
2000 /*MC
2001    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2002 
2003    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2004    and `MATMPIDENSE` otherwise.
2005 
2006    Options Database Keys:
2007 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
2008 
2009   Level: beginner
2010 
2011 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
2012 M*/
2013 
2014 /*MC
2015    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2016 
2017    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2018    and `MATMPIDENSECUDA` otherwise.
2019 
2020    Options Database Keys:
2021 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
2022 
2023   Level: beginner
2024 
2025 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
2026 M*/
2027 
2028 /*@C
2029    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2030 
2031    Collective
2032 
2033    Input Parameters:
2034 .  B - the matrix
2035 -  data - optional location of matrix data.  Set data=NULL for PETSc
2036    to control all matrix memory allocation.
2037 
2038    Notes:
2039    The dense format is fully compatible with standard Fortran 77
2040    storage by columns.
2041 
2042    The data input variable is intended primarily for Fortran programmers
2043    who wish to allocate their own matrix memory space.  Most users should
2044    set data=NULL.
2045 
2046    Level: intermediate
2047 
2048 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2049 @*/
2050 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2051 {
2052   PetscFunctionBegin;
2053   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2054   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 /*@
2059    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2060    array provided by the user. This is useful to avoid copying an array
2061    into a matrix
2062 
2063    Not Collective
2064 
2065    Input Parameters:
2066 +  mat - the matrix
2067 -  array - the array in column major order
2068 
2069    Note:
2070    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2071    freed when the matrix is destroyed.
2072 
2073    Level: developer
2074 
2075 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2076           `MatDenseReplaceArray()`
2077 @*/
2078 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2079 {
2080   PetscFunctionBegin;
2081   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2082   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2083   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2084 #if defined(PETSC_HAVE_DEVICE)
2085   mat->offloadmask = PETSC_OFFLOAD_CPU;
2086 #endif
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 /*@
2091    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2092 
2093    Not Collective
2094 
2095    Input Parameters:
2096 .  mat - the matrix
2097 
2098    Note:
2099    You can only call this after a call to `MatDensePlaceArray()`
2100 
2101    Level: developer
2102 
2103 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2104 @*/
2105 PetscErrorCode MatDenseResetArray(Mat mat)
2106 {
2107   PetscFunctionBegin;
2108   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2109   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2110   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 /*@
2115    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2116    array provided by the user. This is useful to avoid copying an array
2117    into a matrix
2118 
2119    Not Collective
2120 
2121    Input Parameters:
2122 +  mat - the matrix
2123 -  array - the array in column major order
2124 
2125    Note:
2126    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2127    freed by the user. It will be freed when the matrix is destroyed.
2128 
2129    Level: developer
2130 
2131 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2132 @*/
2133 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2134 {
2135   PetscFunctionBegin;
2136   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2137   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2138   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2139 #if defined(PETSC_HAVE_DEVICE)
2140   mat->offloadmask = PETSC_OFFLOAD_CPU;
2141 #endif
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #if defined(PETSC_HAVE_CUDA)
2146 /*@C
2147    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2148    array provided by the user. This is useful to avoid copying an array
2149    into a matrix
2150 
2151    Not Collective
2152 
2153    Input Parameters:
2154 +  mat - the matrix
2155 -  array - the array in column major order
2156 
2157    Note:
2158    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2159    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2160 
2161    Level: developer
2162 
2163 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2164 @*/
2165 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2166 {
2167   PetscFunctionBegin;
2168   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2169   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2170   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2171   mat->offloadmask = PETSC_OFFLOAD_GPU;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /*@C
2176    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2177 
2178    Not Collective
2179 
2180    Input Parameters:
2181 .  mat - the matrix
2182 
2183    Note:
2184    You can only call this after a call to `MatDenseCUDAPlaceArray()`
2185 
2186    Level: developer
2187 
2188 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2189 @*/
2190 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2191 {
2192   PetscFunctionBegin;
2193   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2194   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2195   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 /*@C
2200    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2201    array provided by the user. This is useful to avoid copying an array
2202    into a matrix
2203 
2204    Not Collective
2205 
2206    Input Parameters:
2207 +  mat - the matrix
2208 -  array - the array in column major order
2209 
2210    Note:
2211    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2212    The memory passed in CANNOT be freed by the user. It will be freed
2213    when the matrix is destroyed. The array should respect the matrix leading dimension.
2214 
2215    Level: developer
2216 
2217 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2218 @*/
2219 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2220 {
2221   PetscFunctionBegin;
2222   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2223   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2224   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2225   mat->offloadmask = PETSC_OFFLOAD_GPU;
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 /*@C
2230    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2231 
2232    Not Collective
2233 
2234    Input Parameters:
2235 .  A - the matrix
2236 
2237    Output Parameters
2238 .  array - the GPU array in column major order
2239 
2240    Notes:
2241    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2242 
2243    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2244 
2245    Level: developer
2246 
2247 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2248 @*/
2249 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2250 {
2251   PetscFunctionBegin;
2252   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2253   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2254   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 /*@C
2259    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2260 
2261    Not Collective
2262 
2263    Input Parameters:
2264 +  A - the matrix
2265 -  array - the GPU array in column major order
2266 
2267    Level: developer
2268 
2269 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2270 @*/
2271 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2272 {
2273   PetscFunctionBegin;
2274   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2275   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2276   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2277   A->offloadmask = PETSC_OFFLOAD_GPU;
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 /*@C
2282    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2283 
2284    Not Collective
2285 
2286    Input Parameters:
2287 .  A - the matrix
2288 
2289    Output Parameters
2290 .  array - the GPU array in column major order
2291 
2292    Note:
2293    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2294 
2295    Level: developer
2296 
2297 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2298 @*/
2299 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2300 {
2301   PetscFunctionBegin;
2302   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2303   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 /*@C
2308    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2309 
2310    Not Collective
2311 
2312    Input Parameters:
2313 +  A - the matrix
2314 -  array - the GPU array in column major order
2315 
2316    Note:
2317    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2318 
2319    Level: developer
2320 
2321 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2322 @*/
2323 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2324 {
2325   PetscFunctionBegin;
2326   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 /*@C
2331    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2332 
2333    Not Collective
2334 
2335    Input Parameters:
2336 .  A - the matrix
2337 
2338    Output Parameters
2339 .  array - the GPU array in column major order
2340 
2341    Note:
2342    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2343 
2344    Level: developer
2345 
2346 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2347 @*/
2348 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2349 {
2350   PetscFunctionBegin;
2351   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2352   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2353   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 /*@C
2358    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2359 
2360    Not Collective
2361 
2362    Input Parameters:
2363 +  A - the matrix
2364 -  array - the GPU array in column major order
2365 
2366    Level: developer
2367 
2368 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2369 @*/
2370 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2371 {
2372   PetscFunctionBegin;
2373   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
2374   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2375   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2376   A->offloadmask = PETSC_OFFLOAD_GPU;
2377   PetscFunctionReturn(0);
2378 }
2379 #endif
2380 
2381 /*@C
2382    MatCreateDense - Creates a matrix in `MATDENSE` format.
2383 
2384    Collective
2385 
2386    Input Parameters:
2387 +  comm - MPI communicator
2388 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2389 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2390 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2391 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2392 -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2393    to control all matrix memory allocation.
2394 
2395    Output Parameter:
2396 .  A - the matrix
2397 
2398    Notes:
2399    The dense format is fully compatible with standard Fortran 77
2400    storage by columns.
2401 
2402    Although local portions of the matrix are stored in column-major
2403    order, the matrix is partitioned across MPI ranks by row.
2404 
2405    The data input variable is intended primarily for Fortran programmers
2406    who wish to allocate their own matrix memory space.  Most users should
2407    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
2408 
2409    The user MUST specify either the local or global matrix dimensions
2410    (possibly both).
2411 
2412    Level: intermediate
2413 
2414 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2415 @*/
2416 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
2417 {
2418   PetscFunctionBegin;
2419   PetscCall(MatCreate(comm, A));
2420   PetscCall(MatSetSizes(*A, m, n, M, N));
2421   PetscCall(MatSetType(*A, MATDENSE));
2422   PetscCall(MatSeqDenseSetPreallocation(*A, data));
2423   PetscCall(MatMPIDenseSetPreallocation(*A, data));
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 #if defined(PETSC_HAVE_CUDA)
2428 /*@C
2429    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
2430 
2431    Collective
2432 
2433    Input Parameters:
2434 +  comm - MPI communicator
2435 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2436 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2437 .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2438 .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2439 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2440    to control matrix memory allocation.
2441 
2442    Output Parameter:
2443 .  A - the matrix
2444 
2445    Level: intermediate
2446 
2447 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2448 @*/
2449 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
2450 {
2451   PetscFunctionBegin;
2452   PetscCall(MatCreate(comm, A));
2453   PetscValidLogicalCollectiveBool(*A, !!data, 6);
2454   PetscCall(MatSetSizes(*A, m, n, M, N));
2455   PetscCall(MatSetType(*A, MATDENSECUDA));
2456   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
2457   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
2458   PetscFunctionReturn(0);
2459 }
2460 #endif
2461 
2462 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
2463 {
2464   Mat           mat;
2465   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
2466 
2467   PetscFunctionBegin;
2468   *newmat = NULL;
2469   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
2470   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2471   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
2472   a = (Mat_MPIDense *)mat->data;
2473 
2474   mat->factortype   = A->factortype;
2475   mat->assembled    = PETSC_TRUE;
2476   mat->preallocated = PETSC_TRUE;
2477 
2478   mat->insertmode = NOT_SET_VALUES;
2479   a->donotstash   = oldmat->donotstash;
2480 
2481   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
2482   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
2483 
2484   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2485 
2486   *newmat = mat;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2491 {
2492   PetscBool isbinary;
2493 #if defined(PETSC_HAVE_HDF5)
2494   PetscBool ishdf5;
2495 #endif
2496 
2497   PetscFunctionBegin;
2498   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2499   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2500   /* force binary viewer to load .info file if it has not yet done so */
2501   PetscCall(PetscViewerSetUp(viewer));
2502   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2503 #if defined(PETSC_HAVE_HDF5)
2504   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2505 #endif
2506   if (isbinary) {
2507     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2508 #if defined(PETSC_HAVE_HDF5)
2509   } else if (ishdf5) {
2510     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2511 #endif
2512   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2517 {
2518   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2519   Mat           a, b;
2520 
2521   PetscFunctionBegin;
2522   a = matA->A;
2523   b = matB->A;
2524   PetscCall(MatEqual(a, b, flag));
2525   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2530 {
2531   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2532 
2533   PetscFunctionBegin;
2534   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2535   PetscCall(MatDestroy(&atb->atb));
2536   PetscCall(PetscFree(atb));
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2541 {
2542   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2543 
2544   PetscFunctionBegin;
2545   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2546   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2547   PetscCall(PetscFree(abt));
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2552 {
2553   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2554   Mat_TransMatMultDense *atb;
2555   MPI_Comm               comm;
2556   PetscMPIInt            size, *recvcounts;
2557   PetscScalar           *carray, *sendbuf;
2558   const PetscScalar     *atbarray;
2559   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2560   const PetscInt        *ranges;
2561 
2562   PetscFunctionBegin;
2563   MatCheckProduct(C, 3);
2564   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2565   atb        = (Mat_TransMatMultDense *)C->product->data;
2566   recvcounts = atb->recvcounts;
2567   sendbuf    = atb->sendbuf;
2568 
2569   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2570   PetscCallMPI(MPI_Comm_size(comm, &size));
2571 
2572   /* compute atbarray = aseq^T * bseq */
2573   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
2574 
2575   PetscCall(MatGetOwnershipRanges(C, &ranges));
2576 
2577   /* arrange atbarray into sendbuf */
2578   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2579   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2580   for (proc = 0, k = 0; proc < size; proc++) {
2581     for (j = 0; j < cN; j++) {
2582       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2583     }
2584   }
2585   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2586 
2587   /* sum all atbarray to local values of C */
2588   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2589   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2590   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2591   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2592   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2597 {
2598   MPI_Comm               comm;
2599   PetscMPIInt            size;
2600   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2601   Mat_TransMatMultDense *atb;
2602   PetscBool              cisdense;
2603   PetscInt               i;
2604   const PetscInt        *ranges;
2605 
2606   PetscFunctionBegin;
2607   MatCheckProduct(C, 4);
2608   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2609   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2610   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2611     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2612   }
2613 
2614   /* create matrix product C */
2615   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2616   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2617   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2618   PetscCall(MatSetUp(C));
2619 
2620   /* create data structure for reuse C */
2621   PetscCallMPI(MPI_Comm_size(comm, &size));
2622   PetscCall(PetscNew(&atb));
2623   cM = C->rmap->N;
2624   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2625   PetscCall(MatGetOwnershipRanges(C, &ranges));
2626   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2627 
2628   C->product->data    = atb;
2629   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2634 {
2635   MPI_Comm               comm;
2636   PetscMPIInt            i, size;
2637   PetscInt               maxRows, bufsiz;
2638   PetscMPIInt            tag;
2639   PetscInt               alg;
2640   Mat_MatTransMultDense *abt;
2641   Mat_Product           *product = C->product;
2642   PetscBool              flg;
2643 
2644   PetscFunctionBegin;
2645   MatCheckProduct(C, 4);
2646   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2647   /* check local size of A and B */
2648   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2649 
2650   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2651   alg = flg ? 0 : 1;
2652 
2653   /* setup matrix product C */
2654   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2655   PetscCall(MatSetType(C, MATMPIDENSE));
2656   PetscCall(MatSetUp(C));
2657   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2658 
2659   /* create data structure for reuse C */
2660   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2661   PetscCallMPI(MPI_Comm_size(comm, &size));
2662   PetscCall(PetscNew(&abt));
2663   abt->tag = tag;
2664   abt->alg = alg;
2665   switch (alg) {
2666   case 1: /* alg: "cyclic" */
2667     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2668     bufsiz = A->cmap->N * maxRows;
2669     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2670     break;
2671   default: /* alg: "allgatherv" */
2672     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2673     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2674     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2675     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2676     break;
2677   }
2678 
2679   C->product->data                = abt;
2680   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2681   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2686 {
2687   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2688   Mat_MatTransMultDense *abt;
2689   MPI_Comm               comm;
2690   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2691   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2692   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2693   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2694   const PetscScalar     *av, *bv;
2695   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2696   MPI_Request            reqs[2];
2697   const PetscInt        *ranges;
2698 
2699   PetscFunctionBegin;
2700   MatCheckProduct(C, 3);
2701   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2702   abt = (Mat_MatTransMultDense *)C->product->data;
2703   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2704   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2705   PetscCallMPI(MPI_Comm_size(comm, &size));
2706   PetscCall(MatDenseGetArrayRead(a->A, &av));
2707   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2708   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2709   PetscCall(MatDenseGetLDA(a->A, &i));
2710   PetscCall(PetscBLASIntCast(i, &alda));
2711   PetscCall(MatDenseGetLDA(b->A, &i));
2712   PetscCall(PetscBLASIntCast(i, &blda));
2713   PetscCall(MatDenseGetLDA(c->A, &i));
2714   PetscCall(PetscBLASIntCast(i, &clda));
2715   PetscCall(MatGetOwnershipRanges(B, &ranges));
2716   bn = B->rmap->n;
2717   if (blda == bn) {
2718     sendbuf = (PetscScalar *)bv;
2719   } else {
2720     sendbuf = abt->buf[0];
2721     for (k = 0, i = 0; i < cK; i++) {
2722       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2723     }
2724   }
2725   if (size > 1) {
2726     sendto   = (rank + size - 1) % size;
2727     recvfrom = (rank + size + 1) % size;
2728   } else {
2729     sendto = recvfrom = 0;
2730   }
2731   PetscCall(PetscBLASIntCast(cK, &ck));
2732   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2733   recvisfrom = rank;
2734   for (i = 0; i < size; i++) {
2735     /* we have finished receiving in sending, bufs can be read/modified */
2736     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2737     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2738 
2739     if (nextrecvisfrom != rank) {
2740       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2741       sendsiz = cK * bn;
2742       recvsiz = cK * nextbn;
2743       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2744       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2745       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2746     }
2747 
2748     /* local aseq * sendbuf^T */
2749     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2750     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2751 
2752     if (nextrecvisfrom != rank) {
2753       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2754       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2755     }
2756     bn         = nextbn;
2757     recvisfrom = nextrecvisfrom;
2758     sendbuf    = recvbuf;
2759   }
2760   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2761   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2762   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2763   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2764   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2769 {
2770   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2771   Mat_MatTransMultDense *abt;
2772   MPI_Comm               comm;
2773   PetscMPIInt            size;
2774   PetscScalar           *cv, *sendbuf, *recvbuf;
2775   const PetscScalar     *av, *bv;
2776   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2777   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2778   PetscBLASInt           cm, cn, ck, alda, clda;
2779 
2780   PetscFunctionBegin;
2781   MatCheckProduct(C, 3);
2782   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2783   abt = (Mat_MatTransMultDense *)C->product->data;
2784   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2785   PetscCallMPI(MPI_Comm_size(comm, &size));
2786   PetscCall(MatDenseGetArrayRead(a->A, &av));
2787   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2788   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2789   PetscCall(MatDenseGetLDA(a->A, &i));
2790   PetscCall(PetscBLASIntCast(i, &alda));
2791   PetscCall(MatDenseGetLDA(b->A, &blda));
2792   PetscCall(MatDenseGetLDA(c->A, &i));
2793   PetscCall(PetscBLASIntCast(i, &clda));
2794   /* copy transpose of B into buf[0] */
2795   bn      = B->rmap->n;
2796   sendbuf = abt->buf[0];
2797   recvbuf = abt->buf[1];
2798   for (k = 0, j = 0; j < bn; j++) {
2799     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2800   }
2801   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2802   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2803   PetscCall(PetscBLASIntCast(cK, &ck));
2804   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2805   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2806   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2807   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2808   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2809   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2810   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2811   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2816 {
2817   Mat_MatTransMultDense *abt;
2818 
2819   PetscFunctionBegin;
2820   MatCheckProduct(C, 3);
2821   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2822   abt = (Mat_MatTransMultDense *)C->product->data;
2823   switch (abt->alg) {
2824   case 1:
2825     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2826     break;
2827   default:
2828     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2829     break;
2830   }
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2835 {
2836   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2837 
2838   PetscFunctionBegin;
2839   PetscCall(MatDestroy(&ab->Ce));
2840   PetscCall(MatDestroy(&ab->Ae));
2841   PetscCall(MatDestroy(&ab->Be));
2842   PetscCall(PetscFree(ab));
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 #if defined(PETSC_HAVE_ELEMENTAL)
2847 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2848 {
2849   Mat_MatMultDense *ab;
2850 
2851   PetscFunctionBegin;
2852   MatCheckProduct(C, 3);
2853   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2854   ab = (Mat_MatMultDense *)C->product->data;
2855   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2856   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2857   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2858   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2863 {
2864   Mat               Ae, Be, Ce;
2865   Mat_MatMultDense *ab;
2866 
2867   PetscFunctionBegin;
2868   MatCheckProduct(C, 4);
2869   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2870   /* check local size of A and B */
2871   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2872     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2873   }
2874 
2875   /* create elemental matrices Ae and Be */
2876   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2877   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2878   PetscCall(MatSetType(Ae, MATELEMENTAL));
2879   PetscCall(MatSetUp(Ae));
2880   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2881 
2882   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2883   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2884   PetscCall(MatSetType(Be, MATELEMENTAL));
2885   PetscCall(MatSetUp(Be));
2886   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2887 
2888   /* compute symbolic Ce = Ae*Be */
2889   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2890   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2891 
2892   /* setup C */
2893   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2894   PetscCall(MatSetType(C, MATDENSE));
2895   PetscCall(MatSetUp(C));
2896 
2897   /* create data structure for reuse Cdense */
2898   PetscCall(PetscNew(&ab));
2899   ab->Ae = Ae;
2900   ab->Be = Be;
2901   ab->Ce = Ce;
2902 
2903   C->product->data       = ab;
2904   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2905   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2906   PetscFunctionReturn(0);
2907 }
2908 #endif
2909 /* ----------------------------------------------- */
2910 #if defined(PETSC_HAVE_ELEMENTAL)
2911 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2912 {
2913   PetscFunctionBegin;
2914   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2915   C->ops->productsymbolic = MatProductSymbolic_AB;
2916   PetscFunctionReturn(0);
2917 }
2918 #endif
2919 
2920 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2921 {
2922   Mat_Product *product = C->product;
2923   Mat          A = product->A, B = product->B;
2924 
2925   PetscFunctionBegin;
2926   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2927     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2928   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2929   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2934 {
2935   Mat_Product *product     = C->product;
2936   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2937   PetscInt     alg, nalg = 2;
2938   PetscBool    flg = PETSC_FALSE;
2939 
2940   PetscFunctionBegin;
2941   /* Set default algorithm */
2942   alg = 0; /* default is allgatherv */
2943   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2944   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2945 
2946   /* Get runtime option */
2947   if (product->api_user) {
2948     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2949     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2950     PetscOptionsEnd();
2951   } else {
2952     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2953     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2954     PetscOptionsEnd();
2955   }
2956   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2957 
2958   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2959   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2964 {
2965   Mat_Product *product = C->product;
2966 
2967   PetscFunctionBegin;
2968   switch (product->type) {
2969 #if defined(PETSC_HAVE_ELEMENTAL)
2970   case MATPRODUCT_AB:
2971     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2972     break;
2973 #endif
2974   case MATPRODUCT_AtB:
2975     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2976     break;
2977   case MATPRODUCT_ABt:
2978     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2979     break;
2980   default:
2981     break;
2982   }
2983   PetscFunctionReturn(0);
2984 }
2985