xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2daea058f1fa0b4b5a893c784be52f4813ced5f1)
1 /*
2    Basic functions for basic parallel dense matrices.
3    Portions of this code are under:
4    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 #include <petsc/private/deviceimpl.h>
12 #include <petsc/private/sfimpl.h>
13 
14 /*@
15   MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16   matrix that represents the operator. For sequential matrices it returns itself.
17 
18   Input Parameter:
19 . A - the sequential or MPI `MATDENSE` matrix
20 
21   Output Parameter:
22 . B - the inner matrix
23 
24   Level: intermediate
25 
26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27 @*/
MatDenseGetLocalMatrix(Mat A,Mat * B)28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29 {
30   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31   PetscBool     flg;
32 
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
35   PetscAssertPointer(B, 2);
36   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37   if (flg) *B = mat->A;
38   else {
39     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41     *B = A;
42   }
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
MatCopy_MPIDense(Mat A,Mat B,MatStructure s)46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47 {
48   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
50 
51   PetscFunctionBegin;
52   PetscCall(MatCopy(Amat->A, Bmat->A, s));
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
MatShift_MPIDense(Mat A,PetscScalar alpha)56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57 {
58   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60   PetscScalar  *v;
61 
62   PetscFunctionBegin;
63   PetscCall(MatDenseGetArray(mat->A, &v));
64   PetscCall(MatDenseGetLDA(mat->A, &lda));
65   rend2 = PetscMin(rend, A->cmap->N);
66   if (rend2 > rstart) {
67     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68     PetscCall(PetscLogFlops(rend2 - rstart));
69   }
70   PetscCall(MatDenseRestoreArray(mat->A, &v));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75 {
76   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
78 
79   PetscFunctionBegin;
80   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81   lrow = row - rstart;
82   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87 {
88   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
90 
91   PetscFunctionBegin;
92   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93   lrow = row - rstart;
94   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
MatGetDiagonalBlock_MPIDense(Mat A,Mat * a)98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99 {
100   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
102   PetscScalar  *array;
103   MPI_Comm      comm;
104   PetscBool     flg;
105   Mat           B;
106 
107   PetscFunctionBegin;
108   PetscCall(MatHasCongruentLayouts(A, &flg));
109   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112 #if PetscDefined(HAVE_CUDA)
113     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115 #elif PetscDefined(HAVE_HIP)
116     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118 #endif
119     PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120     PetscCall(MatCreate(comm, &B));
121     PetscCall(MatSetSizes(B, m, m, m, m));
122     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127     *a = B;
128     PetscCall(MatDestroy(&B));
129   } else *a = B;
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134 {
135   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137   PetscBool     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i = 0; i < m; i++) {
141     if (idxm[i] < 0) continue;
142     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147       } else {
148         for (j = 0; j < n; j++) {
149           if (idxn[j] < 0) continue;
150           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152         }
153       }
154     } else if (!A->donotstash) {
155       mat->assembled = PETSC_FALSE;
156       if (roworiented) {
157         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158       } else {
159         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160       }
161     }
162   }
163   PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 
MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167 {
168   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
170 
171   PetscFunctionBegin;
172   for (i = 0; i < m; i++) {
173     if (idxm[i] < 0) continue; /* negative row */
174     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
176     row = idxm[i] - rstart;
177     for (j = 0; j < n; j++) {
178       if (idxn[j] < 0) continue; /* negative column */
179       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180       PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181     }
182   }
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
MatDenseGetLDA_MPIDense(Mat A,PetscInt * lda)186 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
187 {
188   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
189 
190   PetscFunctionBegin;
191   PetscCall(MatDenseGetLDA(a->A, lda));
192   PetscFunctionReturn(PETSC_SUCCESS);
193 }
194 
MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)195 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
196 {
197   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
198   MatType       mtype = MATSEQDENSE;
199 
200   PetscFunctionBegin;
201   if (!a->A) {
202     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203     PetscCall(PetscLayoutSetUp(A->rmap));
204     PetscCall(PetscLayoutSetUp(A->cmap));
205     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
206     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
207 #if PetscDefined(HAVE_CUDA)
208     PetscBool iscuda;
209     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
210     if (iscuda) mtype = MATSEQDENSECUDA;
211 #elif PetscDefined(HAVE_HIP)
212     PetscBool iship;
213     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
214     if (iship) mtype = MATSEQDENSEHIP;
215 #endif
216     PetscCall(MatSetType(a->A, mtype));
217   }
218   PetscCall(MatDenseSetLDA(a->A, lda));
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 
MatDenseGetArray_MPIDense(Mat A,PetscScalar ** array)222 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
223 {
224   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
225 
226   PetscFunctionBegin;
227   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
228   PetscCall(MatDenseGetArray(a->A, array));
229   PetscFunctionReturn(PETSC_SUCCESS);
230 }
231 
MatDenseGetArrayRead_MPIDense(Mat A,PetscScalar ** array)232 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
233 {
234   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
235 
236   PetscFunctionBegin;
237   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
238   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar ** array)242 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
243 {
244   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
245 
246   PetscFunctionBegin;
247   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
248   PetscCall(MatDenseGetArrayWrite(a->A, array));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
MatDensePlaceArray_MPIDense(Mat A,const PetscScalar * array)252 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
253 {
254   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
255 
256   PetscFunctionBegin;
257   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
258   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
259   PetscCall(MatDensePlaceArray(a->A, array));
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
MatDenseResetArray_MPIDense(Mat A)263 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
264 {
265   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
266 
267   PetscFunctionBegin;
268   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
269   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
270   PetscCall(MatDenseResetArray(a->A));
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar * array)274 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
275 {
276   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
277 
278   PetscFunctionBegin;
279   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
280   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
281   PetscCall(MatDenseReplaceArray(a->A, array));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)285 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
286 {
287   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
288   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
289   const PetscInt    *irow, *icol;
290   const PetscScalar *v;
291   PetscScalar       *bv;
292   Mat                newmat;
293   IS                 iscol_local;
294   MPI_Comm           comm_is, comm_mat;
295 
296   PetscFunctionBegin;
297   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
298   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
299   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
300 
301   PetscCall(ISAllGather(iscol, &iscol_local));
302   PetscCall(ISGetIndices(isrow, &irow));
303   PetscCall(ISGetIndices(iscol_local, &icol));
304   PetscCall(ISGetLocalSize(isrow, &nrows));
305   PetscCall(ISGetLocalSize(iscol, &ncols));
306   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
307 
308   /* No parallel redistribution currently supported! Should really check each index set
309      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
310      original matrix! */
311 
312   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
313   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
314 
315   /* Check submatrix call */
316   if (scall == MAT_REUSE_MATRIX) {
317     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
318     /* Really need to test rows and column sizes! */
319     newmat = *B;
320   } else {
321     /* Create and fill new matrix */
322     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
323     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
324     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
325     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
326   }
327 
328   /* Now extract the data pointers and do the copy, column at a time */
329   newmatd = (Mat_MPIDense *)newmat->data;
330   PetscCall(MatDenseGetArray(newmatd->A, &bv));
331   PetscCall(MatDenseGetArrayRead(mat->A, &v));
332   PetscCall(MatDenseGetLDA(mat->A, &lda));
333   for (i = 0; i < Ncols; i++) {
334     const PetscScalar *av = v + lda * icol[i];
335     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
336   }
337   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
338   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
339 
340   /* Assemble the matrices so that the correct flags are set */
341   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
342   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
343 
344   /* Free work space */
345   PetscCall(ISRestoreIndices(isrow, &irow));
346   PetscCall(ISRestoreIndices(iscol_local, &icol));
347   PetscCall(ISDestroy(&iscol_local));
348   *B = newmat;
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
MatDenseRestoreArray_MPIDense(Mat A,PetscScalar ** array)352 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
353 {
354   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
355 
356   PetscFunctionBegin;
357   PetscCall(MatDenseRestoreArray(a->A, array));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
MatDenseRestoreArrayRead_MPIDense(Mat A,PetscScalar ** array)361 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
362 {
363   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
364 
365   PetscFunctionBegin;
366   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
367   PetscFunctionReturn(PETSC_SUCCESS);
368 }
369 
MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar ** array)370 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
371 {
372   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
373 
374   PetscFunctionBegin;
375   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
376   PetscFunctionReturn(PETSC_SUCCESS);
377 }
378 
MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)379 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
380 {
381   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
382   PetscInt      nstash, reallocs;
383 
384   PetscFunctionBegin;
385   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
386 
387   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
388   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
389   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
390   PetscFunctionReturn(PETSC_SUCCESS);
391 }
392 
MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)393 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
394 {
395   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
396   PetscInt      i, *row, *col, flg, j, rstart, ncols;
397   PetscMPIInt   n;
398   PetscScalar  *val;
399 
400   PetscFunctionBegin;
401   if (!mdn->donotstash && !mat->nooffprocentries) {
402     /*  wait on receives */
403     while (1) {
404       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
405       if (!flg) break;
406 
407       for (i = 0; i < n;) {
408         /* Now identify the consecutive vals belonging to the same row */
409         for (j = i, rstart = row[j]; j < n; j++) {
410           if (row[j] != rstart) break;
411         }
412         if (j < n) ncols = j - i;
413         else ncols = n - i;
414         /* Now assemble all these values with a single function call */
415         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
416         i = j;
417       }
418     }
419     PetscCall(MatStashScatterEnd_Private(&mat->stash));
420   }
421 
422   PetscCall(MatAssemblyBegin(mdn->A, mode));
423   PetscCall(MatAssemblyEnd(mdn->A, mode));
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
MatZeroEntries_MPIDense(Mat A)427 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
428 {
429   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
430 
431   PetscFunctionBegin;
432   PetscCall(MatZeroEntries(l->A));
433   PetscFunctionReturn(PETSC_SUCCESS);
434 }
435 
MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)436 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
437 {
438   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
439   PetscInt      i, len, *lrows;
440 
441   PetscFunctionBegin;
442   /* get locally owned rows */
443   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
444   /* fix right-hand side if needed */
445   if (x && b) {
446     const PetscScalar *xx;
447     PetscScalar       *bb;
448 
449     PetscCall(VecGetArrayRead(x, &xx));
450     PetscCall(VecGetArrayWrite(b, &bb));
451     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
452     PetscCall(VecRestoreArrayRead(x, &xx));
453     PetscCall(VecRestoreArrayWrite(b, &bb));
454   }
455   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
456   if (diag != 0.0) {
457     Vec d;
458 
459     PetscCall(MatCreateVecs(A, NULL, &d));
460     PetscCall(VecSet(d, diag));
461     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
462     PetscCall(VecDestroy(&d));
463   }
464   PetscCall(PetscFree(lrows));
465   PetscFunctionReturn(PETSC_SUCCESS);
466 }
467 
468 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
471 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
472 
MatMultColumnRange_MPIDense(Mat mat,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)473 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
474 {
475   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
476   const PetscScalar *ax;
477   PetscScalar       *ay;
478   PetscMemType       axmtype, aymtype;
479 
480   PetscFunctionBegin;
481   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
482   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
483   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
484   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
485   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
486   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
487   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
488   PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
489   PetscFunctionReturn(PETSC_SUCCESS);
490 }
491 
MatMult_MPIDense(Mat mat,Vec xx,Vec yy)492 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
493 {
494   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
495   const PetscScalar *ax;
496   PetscScalar       *ay;
497   PetscMemType       axmtype, aymtype;
498 
499   PetscFunctionBegin;
500   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
501   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
502   PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
503   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
504   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
505   PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
506   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
507   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
MatMultAddColumnRange_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz,PetscInt c_start,PetscInt c_end)511 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
512 {
513   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
514   const PetscScalar *ax;
515   PetscScalar       *ay;
516   PetscMemType       axmtype, aymtype;
517 
518   PetscFunctionBegin;
519   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
520   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
521   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
522   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
523   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
524   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
525   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
526   PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)530 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
531 {
532   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
533   const PetscScalar *ax;
534   PetscScalar       *ay;
535   PetscMemType       axmtype, aymtype;
536 
537   PetscFunctionBegin;
538   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
539   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
540   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
541   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
542   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
543   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
544   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
545   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
MatMultHermitianTransposeColumnRange_MPIDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)549 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
550 {
551   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
552   const PetscScalar *ax;
553   PetscScalar       *ay;
554   PetscMemType       axmtype, aymtype;
555   PetscInt           r_start, r_end;
556   PetscInt           c_start_local, c_end_local;
557 
558   PetscFunctionBegin;
559   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
560   PetscCall(VecZeroEntries(a->lvec));
561   PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
562   c_start_local = PetscMax(c_start, r_start);
563   c_end_local   = PetscMin(c_end, r_end);
564   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
565   if (c_end_local > c_start_local) {
566     if (PetscMemTypeHost(aymtype)) {
567       PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
568     } else {
569       PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
570       PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
571     }
572   }
573   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
574   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
575   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
576   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
577   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
578   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
579   PetscFunctionReturn(PETSC_SUCCESS);
580 }
581 
MatMultTransposeKernel_MPIDense(Mat A,Vec xx,Vec yy,PetscBool herm)582 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
583 {
584   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
585   const PetscScalar *ax;
586   PetscScalar       *ay;
587   PetscMemType       axmtype, aymtype;
588 
589   PetscFunctionBegin;
590   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
591   PetscCall(VecSet(yy, 0.0));
592   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
593   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
594   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
595   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
596   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
597   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
598   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
599   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
600   PetscFunctionReturn(PETSC_SUCCESS);
601 }
602 
MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A,Vec xx,Vec yy,Vec zz,PetscInt c_start,PetscInt c_end)603 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
604 {
605   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
606   const PetscScalar *ax;
607   PetscScalar       *ay;
608   PetscMemType       axmtype, aymtype;
609 
610   PetscFunctionBegin;
611   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
612   PetscCall(VecCopy(yy, zz));
613   PetscCall(VecZeroEntries(a->lvec));
614   PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
615   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
616   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
617   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
618   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
619   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
620   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
MatMultTransposeAddKernel_MPIDense(Mat A,Vec xx,Vec yy,Vec zz,PetscBool herm)624 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
625 {
626   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
627   const PetscScalar *ax;
628   PetscScalar       *ay;
629   PetscMemType       axmtype, aymtype;
630 
631   PetscFunctionBegin;
632   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
633   PetscCall(VecCopy(yy, zz));
634   if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
635   else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
636   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
637   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
638   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
639   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
640   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
641   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
642   PetscFunctionReturn(PETSC_SUCCESS);
643 }
644 
MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)645 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
646 {
647   PetscFunctionBegin;
648   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)652 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
653 {
654   PetscFunctionBegin;
655   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
656   PetscFunctionReturn(PETSC_SUCCESS);
657 }
658 
MatMultHermitianTranspose_MPIDense(Mat A,Vec xx,Vec yy)659 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
660 {
661   PetscFunctionBegin;
662   PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
MatMultHermitianTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)666 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
667 {
668   PetscFunctionBegin;
669   PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
670   PetscFunctionReturn(PETSC_SUCCESS);
671 }
672 
MatGetDiagonal_MPIDense(Mat A,Vec v)673 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
674 {
675   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
676   PetscInt           lda, len, i, nl, ng, m = A->rmap->n, radd;
677   PetscScalar       *x;
678   const PetscScalar *av;
679 
680   PetscFunctionBegin;
681   PetscCall(VecGetArray(v, &x));
682   PetscCall(VecGetSize(v, &ng));
683   PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
684   PetscCall(VecGetLocalSize(v, &nl));
685   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
686   radd = A->rmap->rstart * m;
687   PetscCall(MatDenseGetArrayRead(a->A, &av));
688   PetscCall(MatDenseGetLDA(a->A, &lda));
689   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
690   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
691   if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
692   PetscCall(VecRestoreArray(v, &x));
693   PetscFunctionReturn(PETSC_SUCCESS);
694 }
695 
MatDestroy_MPIDense(Mat mat)696 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
697 {
698   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
699 
700   PetscFunctionBegin;
701   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
702   PetscCall(MatStashDestroy_Private(&mat->stash));
703   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
704   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
705   PetscCall(MatDestroy(&mdn->A));
706   PetscCall(VecDestroy(&mdn->lvec));
707   PetscCall(PetscSFDestroy(&mdn->Mvctx));
708   PetscCall(VecDestroy(&mdn->cvec));
709   PetscCall(MatDestroy(&mdn->cmat));
710 
711   PetscCall(PetscFree(mat->data));
712   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
713 
714   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
715   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
722   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
723   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
724   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
725   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
726   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
727 #if defined(PETSC_HAVE_ELEMENTAL)
728   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
729 #endif
730 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
731   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
732 #endif
733   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
734   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
735   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
736 #if defined(PETSC_HAVE_CUDA)
737   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
738   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
739   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
740   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
741   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
742   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
743   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
744   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
755 #endif
756 #if defined(PETSC_HAVE_HIP)
757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
766   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
768   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
769   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
771   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
772   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
774   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
775 #endif
776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
778   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
781   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
782   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
786   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
790 
791   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 #include <petscdraw.h>
MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)796 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
797 {
798   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
799   PetscMPIInt       rank;
800   PetscViewerType   vtype;
801   PetscBool         isascii, isdraw;
802   PetscViewer       sviewer;
803   PetscViewerFormat format;
804 
805   PetscFunctionBegin;
806   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
807   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
808   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
809   if (isascii) {
810     PetscCall(PetscViewerGetType(viewer, &vtype));
811     PetscCall(PetscViewerGetFormat(viewer, &format));
812     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
813       MatInfo info;
814       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
815       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
816       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
817                                                    (PetscInt)info.memory));
818       PetscCall(PetscViewerFlush(viewer));
819       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
820       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
821       PetscFunctionReturn(PETSC_SUCCESS);
822     } else if (format == PETSC_VIEWER_ASCII_INFO) {
823       PetscFunctionReturn(PETSC_SUCCESS);
824     }
825   } else if (isdraw) {
826     PetscDraw draw;
827     PetscBool isnull;
828 
829     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
830     PetscCall(PetscDrawIsNull(draw, &isnull));
831     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
832   }
833 
834   {
835     /* assemble the entire matrix onto first processor. */
836     Mat          A;
837     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
838     PetscInt    *cols;
839     PetscScalar *vals;
840 
841     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
842     if (rank == 0) {
843       PetscCall(MatSetSizes(A, M, N, M, N));
844     } else {
845       PetscCall(MatSetSizes(A, 0, 0, M, N));
846     }
847     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
848     PetscCall(MatSetType(A, MATMPIDENSE));
849     PetscCall(MatMPIDenseSetPreallocation(A, NULL));
850 
851     /* Copy the matrix ... This isn't the most efficient means,
852        but it's quick for now */
853     A->insertmode = INSERT_VALUES;
854 
855     row = mat->rmap->rstart;
856     m   = mdn->A->rmap->n;
857     for (i = 0; i < m; i++) {
858       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
859       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
860       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
861       row++;
862     }
863 
864     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
865     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
866     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
867     if (rank == 0) {
868       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
869       PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
870     }
871     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
872     PetscCall(MatDestroy(&A));
873   }
874   PetscFunctionReturn(PETSC_SUCCESS);
875 }
876 
MatView_MPIDense(Mat mat,PetscViewer viewer)877 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
878 {
879   PetscBool isascii, isbinary, isdraw, issocket;
880 
881   PetscFunctionBegin;
882   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
883   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
884   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
885   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
886 
887   if (isascii || issocket || isdraw) PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
888   else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
889   PetscFunctionReturn(PETSC_SUCCESS);
890 }
891 
MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo * info)892 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
893 {
894   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
895   Mat            mdn = mat->A;
896   PetscLogDouble isend[5], irecv[5];
897 
898   PetscFunctionBegin;
899   info->block_size = 1.0;
900 
901   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
902 
903   isend[0] = info->nz_used;
904   isend[1] = info->nz_allocated;
905   isend[2] = info->nz_unneeded;
906   isend[3] = info->memory;
907   isend[4] = info->mallocs;
908   if (flag == MAT_LOCAL) {
909     info->nz_used      = isend[0];
910     info->nz_allocated = isend[1];
911     info->nz_unneeded  = isend[2];
912     info->memory       = isend[3];
913     info->mallocs      = isend[4];
914   } else if (flag == MAT_GLOBAL_MAX) {
915     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
916 
917     info->nz_used      = irecv[0];
918     info->nz_allocated = irecv[1];
919     info->nz_unneeded  = irecv[2];
920     info->memory       = irecv[3];
921     info->mallocs      = irecv[4];
922   } else if (flag == MAT_GLOBAL_SUM) {
923     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
924 
925     info->nz_used      = irecv[0];
926     info->nz_allocated = irecv[1];
927     info->nz_unneeded  = irecv[2];
928     info->memory       = irecv[3];
929     info->mallocs      = irecv[4];
930   }
931   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
932   info->fill_ratio_needed = 0;
933   info->factor_mallocs    = 0;
934   PetscFunctionReturn(PETSC_SUCCESS);
935 }
936 
MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)937 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
938 {
939   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
940 
941   PetscFunctionBegin;
942   switch (op) {
943   case MAT_NEW_NONZERO_LOCATIONS:
944   case MAT_NEW_NONZERO_LOCATION_ERR:
945   case MAT_NEW_NONZERO_ALLOCATION_ERR:
946     MatCheckPreallocated(A, 1);
947     PetscCall(MatSetOption(a->A, op, flg));
948     break;
949   case MAT_ROW_ORIENTED:
950     MatCheckPreallocated(A, 1);
951     a->roworiented = flg;
952     PetscCall(MatSetOption(a->A, op, flg));
953     break;
954   case MAT_IGNORE_OFF_PROC_ENTRIES:
955     a->donotstash = flg;
956     break;
957   case MAT_SYMMETRIC:
958   case MAT_STRUCTURALLY_SYMMETRIC:
959   case MAT_HERMITIAN:
960   case MAT_SYMMETRY_ETERNAL:
961   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
962   case MAT_SPD:
963   case MAT_SPD_ETERNAL:
964     /* if the diagonal matrix is square it inherits some of the properties above */
965     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
966     break;
967   default:
968     break;
969   }
970   PetscFunctionReturn(PETSC_SUCCESS);
971 }
972 
MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)973 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
974 {
975   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
976   const PetscScalar *l;
977   PetscScalar        x, *v, *vv, *r;
978   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
979 
980   PetscFunctionBegin;
981   PetscCall(MatDenseGetArray(mdn->A, &vv));
982   PetscCall(MatDenseGetLDA(mdn->A, &lda));
983   PetscCall(MatGetLocalSize(A, &s2, &s3));
984   if (ll) {
985     PetscCall(VecGetLocalSize(ll, &s2a));
986     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
987     PetscCall(VecGetArrayRead(ll, &l));
988     for (i = 0; i < m; i++) {
989       x = l[i];
990       v = vv + i;
991       for (j = 0; j < n; j++) {
992         (*v) *= x;
993         v += lda;
994       }
995     }
996     PetscCall(VecRestoreArrayRead(ll, &l));
997     PetscCall(PetscLogFlops(1.0 * n * m));
998   }
999   if (rr) {
1000     const PetscScalar *ar;
1001 
1002     PetscCall(VecGetLocalSize(rr, &s3a));
1003     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1004     PetscCall(VecGetArrayRead(rr, &ar));
1005     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1006     PetscCall(VecGetArray(mdn->lvec, &r));
1007     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1008     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1009     PetscCall(VecRestoreArrayRead(rr, &ar));
1010     for (i = 0; i < n; i++) {
1011       x = r[i];
1012       v = vv + i * lda;
1013       for (j = 0; j < m; j++) (*v++) *= x;
1014     }
1015     PetscCall(VecRestoreArray(mdn->lvec, &r));
1016     PetscCall(PetscLogFlops(1.0 * n * m));
1017   }
1018   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
MatNorm_MPIDense(Mat A,NormType type,PetscReal * nrm)1022 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1023 {
1024   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
1025   PetscInt           i, j;
1026   PetscMPIInt        size;
1027   PetscReal          sum = 0.0;
1028   const PetscScalar *av, *v;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1032   v = av;
1033   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1034   if (size == 1) {
1035     PetscCall(MatNorm(mdn->A, type, nrm));
1036   } else {
1037     if (type == NORM_FROBENIUS) {
1038       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1039         sum += PetscRealPart(PetscConj(*v) * (*v));
1040         v++;
1041       }
1042       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1043       *nrm = PetscSqrtReal(*nrm);
1044       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1045     } else if (type == NORM_1) {
1046       PetscReal *tmp;
1047 
1048       PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1049       *nrm = 0.0;
1050       v    = av;
1051       for (j = 0; j < mdn->A->cmap->n; j++) {
1052         for (i = 0; i < mdn->A->rmap->n; i++) {
1053           tmp[j] += PetscAbsScalar(*v);
1054           v++;
1055         }
1056       }
1057       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1058       for (j = 0; j < A->cmap->N; j++) {
1059         if (tmp[j] > *nrm) *nrm = tmp[j];
1060       }
1061       PetscCall(PetscFree(tmp));
1062       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1063     } else if (type == NORM_INFINITY) { /* max row norm */
1064       PetscCall(MatNorm(mdn->A, type, nrm));
1065       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1066     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1067   }
1068   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1069   PetscFunctionReturn(PETSC_SUCCESS);
1070 }
1071 
MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat * matout)1072 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1073 {
1074   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1075   Mat           B;
1076   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1077   PetscInt      j, i, lda;
1078   PetscScalar  *v;
1079 
1080   PetscFunctionBegin;
1081   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1082   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1083     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1084     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1085     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1086     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1087   } else B = *matout;
1088 
1089   m = a->A->rmap->n;
1090   n = a->A->cmap->n;
1091   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1092   PetscCall(MatDenseGetLDA(a->A, &lda));
1093   PetscCall(PetscMalloc1(m, &rwork));
1094   for (i = 0; i < m; i++) rwork[i] = rstart + i;
1095   for (j = 0; j < n; j++) {
1096     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1097     v = PetscSafePointerPlusOffset(v, lda);
1098   }
1099   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1100   PetscCall(PetscFree(rwork));
1101   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1102   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1103   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1104     *matout = B;
1105   } else {
1106     PetscCall(MatHeaderMerge(A, &B));
1107   }
1108   PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110 
1111 static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1112 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1113 
MatSetUp_MPIDense(Mat A)1114 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1115 {
1116   PetscFunctionBegin;
1117   PetscCall(PetscLayoutSetUp(A->rmap));
1118   PetscCall(PetscLayoutSetUp(A->cmap));
1119   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)1123 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1124 {
1125   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1126 
1127   PetscFunctionBegin;
1128   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
MatConjugate_MPIDense(Mat mat)1132 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1133 {
1134   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1135 
1136   PetscFunctionBegin;
1137   PetscCall(MatConjugate(a->A));
1138   PetscFunctionReturn(PETSC_SUCCESS);
1139 }
1140 
MatRealPart_MPIDense(Mat A)1141 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1142 {
1143   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(MatRealPart(a->A));
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
MatImaginaryPart_MPIDense(Mat A)1150 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1151 {
1152   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCall(MatImaginaryPart(a->A));
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)1159 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1160 {
1161   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1162 
1163   PetscFunctionBegin;
1164   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1165   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1166   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1171 
MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal * reductions)1172 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1173 {
1174   PetscInt      i, m, n;
1175   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(MatGetSize(A, &m, &n));
1179   if (type == REDUCTION_MEAN_REALPART) {
1180     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1181   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1182     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1183   } else {
1184     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1185   }
1186   if (type == NORM_2) {
1187     for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1188   }
1189   if (type == NORM_INFINITY) {
1190     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1191   } else {
1192     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1193   }
1194   if (type == NORM_2) {
1195     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1196   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1197     for (i = 0; i < n; i++) reductions[i] /= m;
1198   }
1199   PetscFunctionReturn(PETSC_SUCCESS);
1200 }
1201 
MatSetRandom_MPIDense(Mat x,PetscRandom rctx)1202 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1203 {
1204   Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1205 
1206   PetscFunctionBegin;
1207   PetscCall(MatSetRandom(d->A, rctx));
1208 #if defined(PETSC_HAVE_DEVICE)
1209   x->offloadmask = d->A->offloadmask;
1210 #endif
1211   PetscFunctionReturn(PETSC_SUCCESS);
1212 }
1213 
1214 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1215 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1216 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1217 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1218 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1219 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1220 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1221 
1222 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1223                                        MatGetRow_MPIDense,
1224                                        MatRestoreRow_MPIDense,
1225                                        MatMult_MPIDense,
1226                                        /*  4*/ MatMultAdd_MPIDense,
1227                                        MatMultTranspose_MPIDense,
1228                                        MatMultTransposeAdd_MPIDense,
1229                                        NULL,
1230                                        NULL,
1231                                        NULL,
1232                                        /* 10*/ NULL,
1233                                        NULL,
1234                                        NULL,
1235                                        NULL,
1236                                        MatTranspose_MPIDense,
1237                                        /* 15*/ MatGetInfo_MPIDense,
1238                                        MatEqual_MPIDense,
1239                                        MatGetDiagonal_MPIDense,
1240                                        MatDiagonalScale_MPIDense,
1241                                        MatNorm_MPIDense,
1242                                        /* 20*/ MatAssemblyBegin_MPIDense,
1243                                        MatAssemblyEnd_MPIDense,
1244                                        MatSetOption_MPIDense,
1245                                        MatZeroEntries_MPIDense,
1246                                        /* 24*/ MatZeroRows_MPIDense,
1247                                        NULL,
1248                                        NULL,
1249                                        NULL,
1250                                        NULL,
1251                                        /* 29*/ MatSetUp_MPIDense,
1252                                        NULL,
1253                                        NULL,
1254                                        MatGetDiagonalBlock_MPIDense,
1255                                        NULL,
1256                                        /* 34*/ MatDuplicate_MPIDense,
1257                                        NULL,
1258                                        NULL,
1259                                        NULL,
1260                                        NULL,
1261                                        /* 39*/ MatAXPY_MPIDense,
1262                                        MatCreateSubMatrices_MPIDense,
1263                                        NULL,
1264                                        MatGetValues_MPIDense,
1265                                        MatCopy_MPIDense,
1266                                        /* 44*/ NULL,
1267                                        MatScale_MPIDense,
1268                                        MatShift_MPIDense,
1269                                        NULL,
1270                                        NULL,
1271                                        /* 49*/ MatSetRandom_MPIDense,
1272                                        NULL,
1273                                        NULL,
1274                                        NULL,
1275                                        NULL,
1276                                        /* 54*/ NULL,
1277                                        NULL,
1278                                        NULL,
1279                                        NULL,
1280                                        NULL,
1281                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1282                                        MatDestroy_MPIDense,
1283                                        MatView_MPIDense,
1284                                        NULL,
1285                                        NULL,
1286                                        /* 64*/ NULL,
1287                                        NULL,
1288                                        NULL,
1289                                        NULL,
1290                                        NULL,
1291                                        /* 69*/ NULL,
1292                                        NULL,
1293                                        NULL,
1294                                        NULL,
1295                                        NULL,
1296                                        /* 74*/ NULL,
1297                                        NULL,
1298                                        NULL,
1299                                        NULL,
1300                                        MatLoad_MPIDense,
1301                                        /* 79*/ NULL,
1302                                        NULL,
1303                                        NULL,
1304                                        NULL,
1305                                        /* 83*/ NULL,
1306                                        NULL,
1307                                        NULL,
1308                                        NULL,
1309                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1310                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1311                                        /* 89*/ NULL,
1312                                        MatProductSetFromOptions_MPIDense,
1313                                        NULL,
1314                                        NULL,
1315                                        MatConjugate_MPIDense,
1316                                        /* 94*/ NULL,
1317                                        NULL,
1318                                        MatRealPart_MPIDense,
1319                                        MatImaginaryPart_MPIDense,
1320                                        NULL,
1321                                        /*99*/ NULL,
1322                                        NULL,
1323                                        NULL,
1324                                        NULL,
1325                                        MatGetColumnVector_MPIDense,
1326                                        /*104*/ NULL,
1327                                        NULL,
1328                                        NULL,
1329                                        NULL,
1330                                        NULL,
1331                                        /*109*/ NULL,
1332                                        NULL,
1333                                        MatMultHermitianTranspose_MPIDense,
1334                                        MatMultHermitianTransposeAdd_MPIDense,
1335                                        NULL,
1336                                        /*114*/ NULL,
1337                                        MatGetColumnReductions_MPIDense,
1338                                        NULL,
1339                                        NULL,
1340                                        NULL,
1341                                        /*120*/ NULL,
1342                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1343                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1344                                        NULL,
1345                                        /*124*/ NULL,
1346                                        NULL,
1347                                        NULL,
1348                                        NULL,
1349                                        NULL,
1350                                        /*129*/ NULL,
1351                                        NULL,
1352                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1353                                        NULL,
1354                                        NULL,
1355                                        /*134*/ NULL,
1356                                        NULL,
1357                                        NULL,
1358                                        NULL,
1359                                        NULL,
1360                                        /*139*/ NULL,
1361                                        NULL,
1362                                        NULL,
1363                                        NULL,
1364                                        NULL,
1365                                        NULL};
1366 
MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar * data)1367 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1368 {
1369   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1370   MatType       mtype = MATSEQDENSE;
1371 
1372   PetscFunctionBegin;
1373   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1374   PetscCall(PetscLayoutSetUp(mat->rmap));
1375   PetscCall(PetscLayoutSetUp(mat->cmap));
1376   if (!a->A) {
1377     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1378     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1379   }
1380 #if defined(PETSC_HAVE_CUDA)
1381   PetscBool iscuda;
1382   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1383   if (iscuda) mtype = MATSEQDENSECUDA;
1384 #endif
1385 #if defined(PETSC_HAVE_HIP)
1386   PetscBool iship;
1387   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1388   if (iship) mtype = MATSEQDENSEHIP;
1389 #endif
1390   PetscCall(MatSetType(a->A, mtype));
1391   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1392 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1393   mat->offloadmask = a->A->offloadmask;
1394 #endif
1395   mat->preallocated = PETSC_TRUE;
1396   mat->assembled    = PETSC_TRUE;
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
MatConvert_MPIAIJ_MPIDense(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1400 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1401 {
1402   Mat B, C;
1403 
1404   PetscFunctionBegin;
1405   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1406   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1407   PetscCall(MatDestroy(&C));
1408   if (reuse == MAT_REUSE_MATRIX) {
1409     C = *newmat;
1410   } else C = NULL;
1411   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1412   PetscCall(MatDestroy(&B));
1413   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1414   else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1415   PetscFunctionReturn(PETSC_SUCCESS);
1416 }
1417 
MatConvert_MPIDense_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1418 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1419 {
1420   Mat B, C;
1421 
1422   PetscFunctionBegin;
1423   PetscCall(MatDenseGetLocalMatrix(A, &C));
1424   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1425   if (reuse == MAT_REUSE_MATRIX) {
1426     C = *newmat;
1427   } else C = NULL;
1428   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1429   PetscCall(MatDestroy(&B));
1430   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1431   else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1432   PetscFunctionReturn(PETSC_SUCCESS);
1433 }
1434 
1435 #if defined(PETSC_HAVE_ELEMENTAL)
MatConvert_MPIDense_Elemental(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1436 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1437 {
1438   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1439   Mat           mat_elemental;
1440   PetscScalar  *v;
1441   PetscInt      m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1442 
1443   PetscFunctionBegin;
1444   if (reuse == MAT_REUSE_MATRIX) {
1445     mat_elemental = *newmat;
1446     PetscCall(MatZeroEntries(*newmat));
1447   } else {
1448     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1449     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1450     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1451     PetscCall(MatSetUp(mat_elemental));
1452     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1453   }
1454 
1455   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1456   for (i = 0; i < N; i++) cols[i] = i;
1457   for (i = 0; i < m; i++) rows[i] = rstart + i;
1458 
1459   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1460   PetscCall(MatDenseGetArray(A, &v));
1461   PetscCall(MatDenseGetLDA(a->A, &lda));
1462   if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1463   else {
1464     for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1465   }
1466   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1467   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1468   PetscCall(MatDenseRestoreArray(A, &v));
1469   PetscCall(PetscFree2(rows, cols));
1470 
1471   if (reuse == MAT_INPLACE_MATRIX) {
1472     PetscCall(MatHeaderReplace(A, &mat_elemental));
1473   } else {
1474     *newmat = mat_elemental;
1475   }
1476   PetscFunctionReturn(PETSC_SUCCESS);
1477 }
1478 #endif
1479 
MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar ** vals)1480 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1481 {
1482   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1483 
1484   PetscFunctionBegin;
1485   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 
MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar ** vals)1489 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1490 {
1491   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1492 
1493   PetscFunctionBegin;
1494   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1495   PetscFunctionReturn(PETSC_SUCCESS);
1496 }
1497 
MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)1498 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1499 {
1500   Mat_MPIDense *mat;
1501   PetscInt      m, nloc, N;
1502 
1503   PetscFunctionBegin;
1504   PetscCall(MatGetSize(inmat, &m, &N));
1505   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1506   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1507     PetscInt sum;
1508 
1509     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1510     /* Check sum(n) = N */
1511     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1512     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1513 
1514     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1515     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1516   }
1517 
1518   /* numeric phase */
1519   mat = (Mat_MPIDense *)(*outmat)->data;
1520   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1521   PetscFunctionReturn(PETSC_SUCCESS);
1522 }
1523 
MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1524 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1525 {
1526   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1527   PetscInt      lda;
1528 
1529   PetscFunctionBegin;
1530   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1531   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1532   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1533   a->vecinuse = col + 1;
1534   PetscCall(MatDenseGetLDA(a->A, &lda));
1535   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1536   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1537   *v = a->cvec;
1538   PetscFunctionReturn(PETSC_SUCCESS);
1539 }
1540 
MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1541 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1542 {
1543   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1544 
1545   PetscFunctionBegin;
1546   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1547   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1548   VecCheckAssembled(a->cvec);
1549   a->vecinuse = 0;
1550   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1551   PetscCall(VecResetArray(a->cvec));
1552   if (v) *v = NULL;
1553   PetscFunctionReturn(PETSC_SUCCESS);
1554 }
1555 
MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1556 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1557 {
1558   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1559   PetscInt      lda;
1560 
1561   PetscFunctionBegin;
1562   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1563   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1564   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1565   a->vecinuse = col + 1;
1566   PetscCall(MatDenseGetLDA(a->A, &lda));
1567   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1568   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1569   PetscCall(VecLockReadPush(a->cvec));
1570   *v = a->cvec;
1571   PetscFunctionReturn(PETSC_SUCCESS);
1572 }
1573 
MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1574 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1575 {
1576   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1577 
1578   PetscFunctionBegin;
1579   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1580   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1581   VecCheckAssembled(a->cvec);
1582   a->vecinuse = 0;
1583   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1584   PetscCall(VecLockReadPop(a->cvec));
1585   PetscCall(VecResetArray(a->cvec));
1586   if (v) *v = NULL;
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1590 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1591 {
1592   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1593   PetscInt      lda;
1594 
1595   PetscFunctionBegin;
1596   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1597   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1598   if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1599   a->vecinuse = col + 1;
1600   PetscCall(MatDenseGetLDA(a->A, &lda));
1601   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1602   PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1603   *v = a->cvec;
1604   PetscFunctionReturn(PETSC_SUCCESS);
1605 }
1606 
MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1607 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1608 {
1609   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1610 
1611   PetscFunctionBegin;
1612   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1613   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1614   VecCheckAssembled(a->cvec);
1615   a->vecinuse = 0;
1616   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1617   PetscCall(VecResetArray(a->cvec));
1618   if (v) *v = NULL;
1619   PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621 
MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat * v)1622 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1623 {
1624   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1625   Mat_MPIDense *c;
1626   MPI_Comm      comm;
1627   PetscInt      prbegin, prend, pcbegin, pcend;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1631   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1632   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1633   prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1634   prend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1635   pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1636   pcend   = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1637   if (!a->cmat) {
1638     PetscCall(MatCreate(comm, &a->cmat));
1639     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1640     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1641     else {
1642       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1643       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1644       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1645     }
1646     if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1647     else {
1648       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1649       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1650       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1651     }
1652     c             = (Mat_MPIDense *)a->cmat->data;
1653     c->sub_rbegin = rbegin;
1654     c->sub_rend   = rend;
1655     c->sub_cbegin = cbegin;
1656     c->sub_cend   = cend;
1657   }
1658   c = (Mat_MPIDense *)a->cmat->data;
1659   if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1660     PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1661     PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1662     PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1663     PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1664     PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1665     c->sub_rbegin = rbegin;
1666     c->sub_rend   = rend;
1667   }
1668   if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1669     // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1670     if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1671       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1672       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1673       PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1674       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1675       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1676       PetscCall(VecDestroy(&c->lvec));
1677       PetscCall(PetscSFDestroy(&c->Mvctx));
1678     }
1679     c->sub_cbegin = cbegin;
1680     c->sub_cend   = cend;
1681   }
1682   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1683   PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1684 
1685   a->cmat->preallocated = PETSC_TRUE;
1686   a->cmat->assembled    = PETSC_TRUE;
1687 #if defined(PETSC_HAVE_DEVICE)
1688   a->cmat->offloadmask = c->A->offloadmask;
1689 #endif
1690   a->matinuse = cbegin + 1;
1691   *v          = a->cmat;
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat * v)1695 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1696 {
1697   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1698   Mat_MPIDense *c;
1699 
1700   PetscFunctionBegin;
1701   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1702   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1703   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1704   a->matinuse = 0;
1705   c           = (Mat_MPIDense *)a->cmat->data;
1706   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1707   *v = NULL;
1708 #if defined(PETSC_HAVE_DEVICE)
1709   A->offloadmask = a->A->offloadmask;
1710 #endif
1711   PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713 
1714 /*MC
1715    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1716 
1717    Options Database Key:
1718 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1719 
1720   Level: beginner
1721 
1722 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1723 M*/
MatCreate_MPIDense(Mat mat)1724 PetscErrorCode MatCreate_MPIDense(Mat mat)
1725 {
1726   Mat_MPIDense *a;
1727 
1728   PetscFunctionBegin;
1729   PetscCall(PetscNew(&a));
1730   mat->data   = (void *)a;
1731   mat->ops[0] = MatOps_Values;
1732 
1733   mat->insertmode = NOT_SET_VALUES;
1734 
1735   /* build cache for off array entries formed */
1736   a->donotstash = PETSC_FALSE;
1737 
1738   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1739 
1740   /* stuff used for matrix vector multiply */
1741   a->lvec        = NULL;
1742   a->Mvctx       = NULL;
1743   a->roworiented = PETSC_TRUE;
1744 
1745   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1746   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1747   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1748   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1749   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1750   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1751   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1752   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1753   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1754   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1755   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1756   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1757   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1758   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1759   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1760   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1761   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1762   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1763   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1764   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1765   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1766 #if defined(PETSC_HAVE_ELEMENTAL)
1767   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1768 #endif
1769 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
1770   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1771 #endif
1772 #if defined(PETSC_HAVE_CUDA)
1773   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1774 #endif
1775   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1776   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1777   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1778 #if defined(PETSC_HAVE_CUDA)
1779   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1780   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1781 #endif
1782 #if defined(PETSC_HAVE_HIP)
1783   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1784   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1785   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1786 #endif
1787   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1788   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1789   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1790   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1791   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1792   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1793   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 /*MC
1798    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1799 
1800    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1801    and `MATMPIDENSE` otherwise.
1802 
1803    Options Database Key:
1804 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1805 
1806   Level: beginner
1807 
1808 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1809 M*/
1810 
1811 /*@
1812   MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1813 
1814   Collective
1815 
1816   Input Parameters:
1817 + B    - the matrix
1818 - data - optional location of matrix data.  Set to `NULL` for PETSc
1819          to control all matrix memory allocation.
1820 
1821   Level: intermediate
1822 
1823   Notes:
1824   The dense format is fully compatible with standard Fortran
1825   storage by columns.
1826 
1827   The data input variable is intended primarily for Fortran programmers
1828   who wish to allocate their own matrix memory space.  Most users should
1829   set `data` to `NULL`.
1830 
1831 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1832 @*/
MatMPIDenseSetPreallocation(Mat B,PetscScalar * data)1833 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1834 {
1835   PetscFunctionBegin;
1836   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1837   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1838   PetscFunctionReturn(PETSC_SUCCESS);
1839 }
1840 
1841 /*@
1842   MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1843   array provided by the user. This is useful to avoid copying an array
1844   into a matrix
1845 
1846   Not Collective
1847 
1848   Input Parameters:
1849 + mat   - the matrix
1850 - array - the array in column major order
1851 
1852   Level: developer
1853 
1854   Note:
1855   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1856 
1857   You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1858   freed when the matrix is destroyed.
1859 
1860 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1861           `MatDenseReplaceArray()`
1862 @*/
MatDensePlaceArray(Mat mat,const PetscScalar * array)1863 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1864 {
1865   PetscFunctionBegin;
1866   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1867   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1868   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1869 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1870   mat->offloadmask = PETSC_OFFLOAD_CPU;
1871 #endif
1872   PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874 
1875 /*@
1876   MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1877 
1878   Not Collective
1879 
1880   Input Parameter:
1881 . mat - the matrix
1882 
1883   Level: developer
1884 
1885   Note:
1886   You can only call this after a call to `MatDensePlaceArray()`
1887 
1888 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1889 @*/
MatDenseResetArray(Mat mat)1890 PetscErrorCode MatDenseResetArray(Mat mat)
1891 {
1892   PetscFunctionBegin;
1893   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1894   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1895   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1896   PetscFunctionReturn(PETSC_SUCCESS);
1897 }
1898 
1899 /*@
1900   MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1901   array provided by the user. This is useful to avoid copying an array
1902   into a matrix
1903 
1904   Not Collective
1905 
1906   Input Parameters:
1907 + mat   - the matrix
1908 - array - the array in column major order
1909 
1910   Level: developer
1911 
1912   Note:
1913   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1914 
1915   The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1916   freed by the user. It will be freed when the matrix is destroyed.
1917 
1918 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1919 @*/
MatDenseReplaceArray(Mat mat,const PetscScalar * array)1920 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1921 {
1922   PetscFunctionBegin;
1923   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1924   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1925   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1926 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1927   mat->offloadmask = PETSC_OFFLOAD_CPU;
1928 #endif
1929   PetscFunctionReturn(PETSC_SUCCESS);
1930 }
1931 
1932 /*@
1933   MatCreateDense - Creates a matrix in `MATDENSE` format.
1934 
1935   Collective
1936 
1937   Input Parameters:
1938 + comm - MPI communicator
1939 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1940 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1941 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1942 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1943 - data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1944    to control all matrix memory allocation.
1945 
1946   Output Parameter:
1947 . A - the matrix
1948 
1949   Level: intermediate
1950 
1951   Notes:
1952   The dense format is fully compatible with standard Fortran
1953   storage by columns.
1954 
1955   Although local portions of the matrix are stored in column-major
1956   order, the matrix is partitioned across MPI ranks by row.
1957 
1958   The data input variable is intended primarily for Fortran programmers
1959   who wish to allocate their own matrix memory space.  Most users should
1960   set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).
1961 
1962   The user MUST specify either the local or global matrix dimensions
1963   (possibly both).
1964 
1965 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1966 @*/
MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar data[],Mat * A)1967 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1968 {
1969   PetscFunctionBegin;
1970   PetscCall(MatCreate(comm, A));
1971   PetscCall(MatSetSizes(*A, m, n, M, N));
1972   PetscCall(MatSetType(*A, MATDENSE));
1973   PetscCall(MatSeqDenseSetPreallocation(*A, data));
1974   PetscCall(MatMPIDenseSetPreallocation(*A, data));
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat * newmat)1978 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1979 {
1980   Mat           mat;
1981   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1982 
1983   PetscFunctionBegin;
1984   *newmat = NULL;
1985   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1986   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1987   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1988   a = (Mat_MPIDense *)mat->data;
1989 
1990   mat->factortype   = A->factortype;
1991   mat->assembled    = PETSC_TRUE;
1992   mat->preallocated = PETSC_TRUE;
1993 
1994   mat->insertmode = NOT_SET_VALUES;
1995   a->donotstash   = oldmat->donotstash;
1996 
1997   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1998   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1999 
2000   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2001 
2002   *newmat = mat;
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
MatLoad_MPIDense(Mat newMat,PetscViewer viewer)2006 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2007 {
2008   PetscBool isbinary;
2009 #if defined(PETSC_HAVE_HDF5)
2010   PetscBool ishdf5;
2011 #endif
2012 
2013   PetscFunctionBegin;
2014   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2015   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2016   /* force binary viewer to load .info file if it has not yet done so */
2017   PetscCall(PetscViewerSetUp(viewer));
2018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2019 #if defined(PETSC_HAVE_HDF5)
2020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2021 #endif
2022   if (isbinary) {
2023     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2024 #if defined(PETSC_HAVE_HDF5)
2025   } else if (ishdf5) {
2026     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2027 #endif
2028   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2029   PetscFunctionReturn(PETSC_SUCCESS);
2030 }
2031 
MatEqual_MPIDense(Mat A,Mat B,PetscBool * flag)2032 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2033 {
2034   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2035   Mat           a, b;
2036 
2037   PetscFunctionBegin;
2038   a = matA->A;
2039   b = matB->A;
2040   PetscCall(MatEqual(a, b, flag));
2041   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2042   PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044 
MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense(PetscCtxRt data)2045 static PetscErrorCode MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense(PetscCtxRt data)
2046 {
2047   MatProductCtx_TransMatMultDense *atb = *(MatProductCtx_TransMatMultDense **)data;
2048 
2049   PetscFunctionBegin;
2050   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2051   PetscCall(MatDestroy(&atb->atb));
2052   PetscCall(PetscFree(atb));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense(PetscCtxRt data)2056 static PetscErrorCode MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense(PetscCtxRt data)
2057 {
2058   MatProductCtx_MatTransMultDense *abt = *(MatProductCtx_MatTransMultDense **)data;
2059 
2060   PetscFunctionBegin;
2061   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2062   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2063   PetscCall(PetscFree(abt));
2064   PetscFunctionReturn(PETSC_SUCCESS);
2065 }
2066 
MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2067 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2068 {
2069   Mat_MPIDense                    *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2070   MatProductCtx_TransMatMultDense *atb;
2071   MPI_Comm                         comm;
2072   PetscMPIInt                      size, *recvcounts;
2073   PetscScalar                     *carray, *sendbuf;
2074   const PetscScalar               *atbarray;
2075   PetscInt                         i, cN = C->cmap->N, proc, k, j, lda;
2076   const PetscInt                  *ranges;
2077 
2078   PetscFunctionBegin;
2079   MatCheckProduct(C, 3);
2080   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2081   atb        = (MatProductCtx_TransMatMultDense *)C->product->data;
2082   recvcounts = atb->recvcounts;
2083   sendbuf    = atb->sendbuf;
2084 
2085   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2086   PetscCallMPI(MPI_Comm_size(comm, &size));
2087 
2088   /* compute atbarray = aseq^T * bseq */
2089   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2090 
2091   PetscCall(MatGetOwnershipRanges(C, &ranges));
2092 
2093   if (ranges[1] == C->rmap->N) {
2094     /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2095     PetscInt           atb_lda, c_lda;
2096     Mat                atb_local = atb->atb;
2097     Mat                atb_alloc = NULL;
2098     Mat                c_local   = c->A;
2099     Mat                c_alloc   = NULL;
2100     PetscMemType       atb_memtype, c_memtype;
2101     const PetscScalar *atb_array = NULL;
2102     MPI_Datatype       vector_type;
2103     PetscScalar       *c_array = NULL;
2104     PetscMPIInt        rank;
2105 
2106     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2107 
2108     PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2109     if (atb_lda != C->rmap->N) {
2110       // copy atb to a matrix that will have lda == the number of rows
2111       PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2112       PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2113       atb_local = atb_alloc;
2114     }
2115 
2116     if (rank == 0) {
2117       PetscCall(MatDenseGetLDA(c_local, &c_lda));
2118       if (c_lda != C->rmap->N) {
2119         // copy c to a matrix that will have lda == the number of rows
2120         PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2121         c_local = c_alloc;
2122       }
2123       PetscCall(MatZeroEntries(c_local));
2124     }
2125     /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2126      * B->cmap->N: use the a->Mvctx to use the best reduction method */
2127     if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2128     vector_type = MPIU_SCALAR;
2129     if (B->cmap->N > 1) {
2130       PetscMPIInt mpi_N;
2131 
2132       PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2133       PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2134       PetscCallMPI(MPI_Type_commit(&vector_type));
2135     }
2136     PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2137     PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2138     PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2139     PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2140     PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2141     PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2142     if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2143     if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2144     PetscCall(MatDestroy(&atb_alloc));
2145     PetscCall(MatDestroy(&c_alloc));
2146     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2147     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2148     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2149     PetscFunctionReturn(PETSC_SUCCESS);
2150   }
2151 
2152   /* arrange atbarray into sendbuf */
2153   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2154   PetscCall(MatDenseGetLDA(atb->atb, &lda));
2155   for (proc = 0, k = 0; proc < size; proc++) {
2156     for (j = 0; j < cN; j++) {
2157       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2158     }
2159   }
2160   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2161 
2162   /* sum all atbarray to local values of C */
2163   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2164   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2165   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2166   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2167   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2168   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2169   PetscFunctionReturn(PETSC_SUCCESS);
2170 }
2171 
MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2172 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2173 {
2174   MPI_Comm                         comm;
2175   PetscMPIInt                      size;
2176   PetscInt                         cm = A->cmap->n, cM, cN = B->cmap->N;
2177   MatProductCtx_TransMatMultDense *atb;
2178   PetscBool                        cisdense = PETSC_FALSE;
2179   const PetscInt                  *ranges;
2180 
2181   PetscFunctionBegin;
2182   MatCheckProduct(C, 4);
2183   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2184   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2185   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart,
2186              A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2187 
2188   /* create matrix product C */
2189   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2190 #if defined(PETSC_HAVE_CUDA)
2191   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2192 #endif
2193 #if defined(PETSC_HAVE_HIP)
2194   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2195 #endif
2196   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2197   PetscCall(MatSetUp(C));
2198 
2199   /* create data structure for reuse C */
2200   PetscCallMPI(MPI_Comm_size(comm, &size));
2201   PetscCall(PetscNew(&atb));
2202   cM = C->rmap->N;
2203   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2204   PetscCall(MatGetOwnershipRanges(C, &ranges));
2205   for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2206   C->product->data    = atb;
2207   C->product->destroy = MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense;
2208   PetscFunctionReturn(PETSC_SUCCESS);
2209 }
2210 
MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2211 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2212 {
2213   MPI_Comm                         comm;
2214   PetscMPIInt                      i, size;
2215   PetscInt                         maxRows, bufsiz;
2216   PetscMPIInt                      tag;
2217   PetscInt                         alg;
2218   MatProductCtx_MatTransMultDense *abt;
2219   Mat_Product                     *product = C->product;
2220   PetscBool                        flg;
2221 
2222   PetscFunctionBegin;
2223   MatCheckProduct(C, 4);
2224   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2225   /* check local size of A and B */
2226   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2227 
2228   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2229   alg = flg ? 0 : 1;
2230 
2231   /* setup matrix product C */
2232   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2233   PetscCall(MatSetType(C, MATMPIDENSE));
2234   PetscCall(MatSetUp(C));
2235   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2236 
2237   /* create data structure for reuse C */
2238   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2239   PetscCallMPI(MPI_Comm_size(comm, &size));
2240   PetscCall(PetscNew(&abt));
2241   abt->tag = tag;
2242   abt->alg = alg;
2243   switch (alg) {
2244   case 1: /* alg: "cyclic" */
2245     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2246     bufsiz = A->cmap->N * maxRows;
2247     PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2248     break;
2249   default: /* alg: "allgatherv" */
2250     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2251     PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2252     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2253     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2254     break;
2255   }
2256 
2257   C->product->data                = abt;
2258   C->product->destroy             = MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense;
2259   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2260   PetscFunctionReturn(PETSC_SUCCESS);
2261 }
2262 
MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A,Mat B,Mat C)2263 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2264 {
2265   Mat_MPIDense                    *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2266   MatProductCtx_MatTransMultDense *abt;
2267   MPI_Comm                         comm;
2268   PetscMPIInt                      rank, size, sendto, recvfrom, recvisfrom;
2269   PetscScalar                     *sendbuf, *recvbuf = NULL, *cv;
2270   PetscInt                         i, cK             = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2271   PetscScalar                      _DOne = 1.0, _DZero = 0.0;
2272   const PetscScalar               *av, *bv;
2273   PetscBLASInt                     cm, cn, ck, alda, blda = 0, clda;
2274   MPI_Request                      reqs[2];
2275   const PetscInt                  *ranges;
2276 
2277   PetscFunctionBegin;
2278   MatCheckProduct(C, 3);
2279   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2280   abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2281   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2282   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2283   PetscCallMPI(MPI_Comm_size(comm, &size));
2284   PetscCall(MatDenseGetArrayRead(a->A, &av));
2285   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2286   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2287   PetscCall(MatDenseGetLDA(a->A, &i));
2288   PetscCall(PetscBLASIntCast(i, &alda));
2289   PetscCall(MatDenseGetLDA(b->A, &i));
2290   PetscCall(PetscBLASIntCast(i, &blda));
2291   PetscCall(MatDenseGetLDA(c->A, &i));
2292   PetscCall(PetscBLASIntCast(i, &clda));
2293   PetscCall(MatGetOwnershipRanges(B, &ranges));
2294   bn = B->rmap->n;
2295   if (blda == bn) {
2296     sendbuf = (PetscScalar *)bv;
2297   } else {
2298     sendbuf = abt->buf[0];
2299     for (k = 0, i = 0; i < cK; i++) {
2300       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2301     }
2302   }
2303   if (size > 1) {
2304     sendto   = (rank + size - 1) % size;
2305     recvfrom = (rank + size + 1) % size;
2306   } else {
2307     sendto = recvfrom = 0;
2308   }
2309   PetscCall(PetscBLASIntCast(cK, &ck));
2310   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2311   recvisfrom = rank;
2312   for (i = 0; i < size; i++) {
2313     /* we have finished receiving in sending, bufs can be read/modified */
2314     PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2315     PetscInt    nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2316 
2317     if (nextrecvisfrom != rank) {
2318       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2319       sendsiz = cK * bn;
2320       recvsiz = cK * nextbn;
2321       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2322       PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2323       PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2324     }
2325 
2326     /* local aseq * sendbuf^T */
2327     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2328     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2329 
2330     if (nextrecvisfrom != rank) {
2331       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2332       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2333     }
2334     bn         = nextbn;
2335     recvisfrom = nextrecvisfrom;
2336     sendbuf    = recvbuf;
2337   }
2338   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2339   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2340   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2341   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2342   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2343   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2344   PetscFunctionReturn(PETSC_SUCCESS);
2345 }
2346 
MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A,Mat B,Mat C)2347 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2348 {
2349   Mat_MPIDense                    *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2350   MatProductCtx_MatTransMultDense *abt;
2351   MPI_Comm                         comm;
2352   PetscMPIInt                      size, ibn;
2353   PetscScalar                     *cv, *sendbuf, *recvbuf;
2354   const PetscScalar               *av, *bv;
2355   PetscInt                         blda, i, cK = A->cmap->N, k, j, bn;
2356   PetscScalar                      _DOne = 1.0, _DZero = 0.0;
2357   PetscBLASInt                     cm, cn, ck, alda, clda;
2358 
2359   PetscFunctionBegin;
2360   MatCheckProduct(C, 3);
2361   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2362   abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2363   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2364   PetscCallMPI(MPI_Comm_size(comm, &size));
2365   PetscCall(MatDenseGetArrayRead(a->A, &av));
2366   PetscCall(MatDenseGetArrayRead(b->A, &bv));
2367   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2368   PetscCall(MatDenseGetLDA(a->A, &i));
2369   PetscCall(PetscBLASIntCast(i, &alda));
2370   PetscCall(MatDenseGetLDA(b->A, &blda));
2371   PetscCall(MatDenseGetLDA(c->A, &i));
2372   PetscCall(PetscBLASIntCast(i, &clda));
2373   /* copy transpose of B into buf[0] */
2374   bn      = B->rmap->n;
2375   sendbuf = abt->buf[0];
2376   recvbuf = abt->buf[1];
2377   for (k = 0, j = 0; j < bn; j++) {
2378     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2379   }
2380   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2381   PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2382   PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2383   PetscCall(PetscBLASIntCast(cK, &ck));
2384   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2385   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2386   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2387   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2388   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2389   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2390   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2391   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2392   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2393   PetscFunctionReturn(PETSC_SUCCESS);
2394 }
2395 
MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2396 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2397 {
2398   MatProductCtx_MatTransMultDense *abt;
2399 
2400   PetscFunctionBegin;
2401   MatCheckProduct(C, 3);
2402   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2403   abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2404   switch (abt->alg) {
2405   case 1:
2406     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2407     break;
2408   default:
2409     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2410     break;
2411   }
2412   PetscFunctionReturn(PETSC_SUCCESS);
2413 }
2414 
MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense(PetscCtxRt data)2415 static PetscErrorCode MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense(PetscCtxRt data)
2416 {
2417   MatProductCtx_MatMultDense *ab = *(MatProductCtx_MatMultDense **)data;
2418 
2419   PetscFunctionBegin;
2420   PetscCall(MatDestroy(&ab->Ce));
2421   PetscCall(MatDestroy(&ab->Ae));
2422   PetscCall(MatDestroy(&ab->Be));
2423   PetscCall(PetscFree(ab));
2424   PetscFunctionReturn(PETSC_SUCCESS);
2425 }
2426 
MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2427 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2428 {
2429   MatProductCtx_MatMultDense *ab;
2430   Mat_MPIDense               *mdn = (Mat_MPIDense *)A->data;
2431   Mat_MPIDense               *b   = (Mat_MPIDense *)B->data;
2432 
2433   PetscFunctionBegin;
2434   MatCheckProduct(C, 3);
2435   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2436   ab = (MatProductCtx_MatMultDense *)C->product->data;
2437   if (ab->Ae && ab->Ce) {
2438 #if PetscDefined(HAVE_ELEMENTAL)
2439     PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2440     PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2441     PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2442     PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2443 #else
2444     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2445 #endif
2446   } else {
2447     MPI_Comm           comm;
2448     const PetscScalar *read;
2449     PetscScalar       *write;
2450     PetscInt           lda;
2451     const PetscInt    *ranges;
2452     PetscMPIInt        size;
2453 
2454     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2455     comm = PetscObjectComm((PetscObject)B);
2456     PetscCallMPI(MPI_Comm_size(comm, &size));
2457     PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2458     if (ranges[1] == ranges[size]) {
2459       // optimize for the case where the B matrix is broadcast from rank 0
2460       PetscInt           b_lda, be_lda;
2461       Mat                b_local  = b->A;
2462       Mat                b_alloc  = NULL;
2463       Mat                be_local = ab->Be;
2464       Mat                be_alloc = NULL;
2465       PetscMemType       b_memtype, be_memtype;
2466       const PetscScalar *b_array = NULL;
2467       MPI_Datatype       vector_type;
2468       PetscScalar       *be_array = NULL;
2469       PetscMPIInt        rank;
2470 
2471       PetscCallMPI(MPI_Comm_rank(comm, &rank));
2472       PetscCall(MatDenseGetLDA(be_local, &be_lda));
2473       if (be_lda != B->rmap->N) {
2474         PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2475         be_local = be_alloc;
2476       }
2477 
2478       if (rank == 0) {
2479         PetscCall(MatDenseGetLDA(b_local, &b_lda));
2480         if (b_lda != B->rmap->N) {
2481           PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2482           PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2483           b_local = b_alloc;
2484         }
2485       }
2486       vector_type = MPIU_SCALAR;
2487       if (B->cmap->N > 1) {
2488         PetscMPIInt mpi_N;
2489 
2490         PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2491         PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2492         PetscCallMPI(MPI_Type_commit(&vector_type));
2493       }
2494       PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2495       PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2496       PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2497       PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2498       PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2499       PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2500       if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2501       if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2502       PetscCall(MatDestroy(&be_alloc));
2503       PetscCall(MatDestroy(&b_alloc));
2504     } else {
2505       PetscCall(MatDenseGetLDA(B, &lda));
2506       PetscCall(MatDenseGetArrayRead(B, &read));
2507       PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2508       for (PetscInt i = 0; i < C->cmap->N; ++i) {
2509         PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2510         PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2511       }
2512       PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2513       PetscCall(MatDenseRestoreArrayRead(B, &read));
2514     }
2515     PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2516   }
2517   PetscFunctionReturn(PETSC_SUCCESS);
2518 }
2519 
MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2520 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2521 {
2522   Mat_Product                *product = C->product;
2523   PetscInt                    alg;
2524   MatProductCtx_MatMultDense *ab;
2525   PetscBool                   flg;
2526 
2527   PetscFunctionBegin;
2528   MatCheckProduct(C, 4);
2529   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2530   /* check local size of A and B */
2531   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2532              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2533 
2534   PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2535   alg = flg ? 0 : 1;
2536 
2537   /* setup C */
2538   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2539   PetscCall(MatSetType(C, MATMPIDENSE));
2540   PetscCall(MatSetUp(C));
2541 
2542   /* create data structure for reuse Cdense */
2543   PetscCall(PetscNew(&ab));
2544 
2545   switch (alg) {
2546   case 1: /* alg: "elemental" */
2547 #if PetscDefined(HAVE_ELEMENTAL)
2548     /* create elemental matrices Ae and Be */
2549     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2550     PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2551     PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2552     PetscCall(MatSetUp(ab->Ae));
2553     PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2554 
2555     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2556     PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2557     PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2558     PetscCall(MatSetUp(ab->Be));
2559     PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2560 
2561     /* compute symbolic Ce = Ae*Be */
2562     PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2563     PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2564 #else
2565     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2566 #endif
2567     break;
2568   default: /* alg: "petsc" */
2569     ab->Ae = NULL;
2570     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2571     ab->Ce = NULL;
2572     break;
2573   }
2574 
2575   C->product->data       = ab;
2576   C->product->destroy    = MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense;
2577   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2578   PetscFunctionReturn(PETSC_SUCCESS);
2579 }
2580 
MatProductSetFromOptions_MPIDense_AB(Mat C)2581 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2582 {
2583   Mat_Product *product     = C->product;
2584   const char  *algTypes[2] = {"petsc", "elemental"};
2585   PetscInt     alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2586   PetscBool    flg = PETSC_FALSE;
2587 
2588   PetscFunctionBegin;
2589   /* Set default algorithm */
2590   alg = 0; /* default is PETSc */
2591   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2592   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2593 
2594   /* Get runtime option */
2595   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2596   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2597   PetscOptionsEnd();
2598   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2599 
2600   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2601   C->ops->productsymbolic = MatProductSymbolic_AB;
2602   PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604 
MatProductSetFromOptions_MPIDense_AtB(Mat C)2605 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2606 {
2607   Mat_Product *product = C->product;
2608   Mat          A = product->A, B = product->B;
2609 
2610   PetscFunctionBegin;
2611   PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2612              A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2613   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2614   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2615   PetscFunctionReturn(PETSC_SUCCESS);
2616 }
2617 
MatProductSetFromOptions_MPIDense_ABt(Mat C)2618 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2619 {
2620   Mat_Product *product     = C->product;
2621   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2622   PetscInt     alg, nalg = 2;
2623   PetscBool    flg = PETSC_FALSE;
2624 
2625   PetscFunctionBegin;
2626   /* Set default algorithm */
2627   alg = 0; /* default is allgatherv */
2628   PetscCall(PetscStrcmp(product->alg, "default", &flg));
2629   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2630 
2631   /* Get runtime option */
2632   if (product->api_user) {
2633     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2634     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2635     PetscOptionsEnd();
2636   } else {
2637     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2638     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2639     PetscOptionsEnd();
2640   }
2641   if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2642 
2643   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2644   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2645   PetscFunctionReturn(PETSC_SUCCESS);
2646 }
2647 
MatProductSetFromOptions_MPIDense(Mat C)2648 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2649 {
2650   Mat_Product *product = C->product;
2651 
2652   PetscFunctionBegin;
2653   switch (product->type) {
2654   case MATPRODUCT_AB:
2655     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2656     break;
2657   case MATPRODUCT_AtB:
2658     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2659     break;
2660   case MATPRODUCT_ABt:
2661     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2662     break;
2663   default:
2664     break;
2665   }
2666   PetscFunctionReturn(PETSC_SUCCESS);
2667 }
2668 
MatDenseScatter_Private(PetscSF sf,Mat X,Mat Y,InsertMode mode,ScatterMode smode)2669 PetscErrorCode MatDenseScatter_Private(PetscSF sf, Mat X, Mat Y, InsertMode mode, ScatterMode smode)
2670 {
2671   const PetscScalar *in;
2672   PetscScalar       *out;
2673   PetscSF            vsf;
2674   PetscInt           N, ny, rld, lld;
2675   PetscMemType       mtype[2];
2676   MPI_Op             op = MPI_OP_NULL;
2677 
2678   PetscFunctionBegin;
2679   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2680   PetscValidHeaderSpecific(X, MAT_CLASSID, 2);
2681   PetscValidHeaderSpecific(Y, MAT_CLASSID, 3);
2682   if (mode == INSERT_VALUES) op = MPI_REPLACE;
2683   else if (mode == ADD_VALUES) op = MPIU_SUM;
2684   else if (mode == MAX_VALUES) op = MPIU_MAX;
2685   else if (mode == MIN_VALUES) op = MPIU_MIN;
2686   PetscCheck(op != MPI_OP_NULL, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in MatDenseScatter_Private()", mode);
2687   PetscCheck(smode == SCATTER_FORWARD || smode == SCATTER_REVERSE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported ScatterMode %d in MatDenseScatter_Private()", smode);
2688   PetscCall(MatGetSize(X, NULL, &N));
2689   PetscCall(MatGetSize(Y, NULL, &ny));
2690   PetscCheck(N == ny, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_SIZ, "Matrix column sizes must match: %" PetscInt_FMT " != %" PetscInt_FMT, N, ny);
2691   PetscCall(MatDenseGetLDA(X, &rld));
2692   PetscCall(MatDenseGetLDA(Y, &lld));
2693   /* get cached or create new strided PetscSF when the number of columns is greater than one */
2694   if (N > 1) {
2695     PetscCall(PetscObjectQuery((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject *)&vsf));
2696     if (vsf) {
2697       PetscInt nr[2], nl[2];
2698 
2699       PetscCall(PetscSFGetGraph(sf, nr, nl, NULL, NULL));
2700       PetscCall(PetscSFGetGraph(vsf, nr + 1, nl + 1, NULL, NULL));
2701       if (N * nr[0] != nr[1] || N * nl[0] != nl[1]) vsf = NULL;
2702     }
2703     if (!vsf) {
2704       PetscCall(PetscSFCreateStridedSF(sf, N, rld, lld, &vsf));
2705       PetscCall(PetscObjectCompose((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject)vsf));
2706       PetscCall(PetscObjectDereference((PetscObject)vsf));
2707     }
2708   } else vsf = sf;
2709   /* the output array is accessed in read and write mode,
2710     but write-only in the INSERT_VALUES case could be worth exploring */
2711   PetscCall(MatDenseGetArrayReadAndMemType(X, &in, &mtype[0]));
2712   PetscCall(MatDenseGetArrayAndMemType(Y, &out, &mtype[1]));
2713   if (smode == SCATTER_FORWARD) {
2714     PetscCall(PetscSFBcastWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2715     PetscCall(PetscSFBcastEnd(vsf, vsf->vscat.unit, in, out, op));
2716   } else {
2717     PetscCall(PetscSFReduceWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2718     PetscCall(PetscSFReduceEnd(vsf, vsf->vscat.unit, in, out, op));
2719   }
2720   PetscCall(MatDenseRestoreArrayAndMemType(Y, &out));
2721   PetscCall(MatDenseRestoreArrayReadAndMemType(X, &in));
2722   PetscFunctionReturn(PETSC_SUCCESS);
2723 }
2724