1 /*
2 Basic functions for basic parallel dense matrices.
3 Portions of this code are under:
4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5 */
6
7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 #include <petsc/private/vecimpl.h>
11 #include <petsc/private/deviceimpl.h>
12 #include <petsc/private/sfimpl.h>
13
14 /*@
15 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
16 matrix that represents the operator. For sequential matrices it returns itself.
17
18 Input Parameter:
19 . A - the sequential or MPI `MATDENSE` matrix
20
21 Output Parameter:
22 . B - the inner matrix
23
24 Level: intermediate
25
26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
27 @*/
MatDenseGetLocalMatrix(Mat A,Mat * B)28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
29 {
30 Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
31 PetscBool flg;
32
33 PetscFunctionBegin;
34 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
35 PetscAssertPointer(B, 2);
36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
37 if (flg) *B = mat->A;
38 else {
39 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
40 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
41 *B = A;
42 }
43 PetscFunctionReturn(PETSC_SUCCESS);
44 }
45
MatCopy_MPIDense(Mat A,Mat B,MatStructure s)46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
47 {
48 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
49 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
50
51 PetscFunctionBegin;
52 PetscCall(MatCopy(Amat->A, Bmat->A, s));
53 PetscFunctionReturn(PETSC_SUCCESS);
54 }
55
MatShift_MPIDense(Mat A,PetscScalar alpha)56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
57 {
58 Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
59 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
60 PetscScalar *v;
61
62 PetscFunctionBegin;
63 PetscCall(MatDenseGetArray(mat->A, &v));
64 PetscCall(MatDenseGetLDA(mat->A, &lda));
65 rend2 = PetscMin(rend, A->cmap->N);
66 if (rend2 > rstart) {
67 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
68 PetscCall(PetscLogFlops(rend2 - rstart));
69 }
70 PetscCall(MatDenseRestoreArray(mat->A, &v));
71 PetscFunctionReturn(PETSC_SUCCESS);
72 }
73
MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
75 {
76 Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
77 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
78
79 PetscFunctionBegin;
80 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
81 lrow = row - rstart;
82 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
83 PetscFunctionReturn(PETSC_SUCCESS);
84 }
85
MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
87 {
88 Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
89 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
90
91 PetscFunctionBegin;
92 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
93 lrow = row - rstart;
94 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
95 PetscFunctionReturn(PETSC_SUCCESS);
96 }
97
MatGetDiagonalBlock_MPIDense(Mat A,Mat * a)98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
99 {
100 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
101 PetscInt m = A->rmap->n, rstart = A->rmap->rstart;
102 PetscScalar *array;
103 MPI_Comm comm;
104 PetscBool flg;
105 Mat B;
106
107 PetscFunctionBegin;
108 PetscCall(MatHasCongruentLayouts(A, &flg));
109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
110 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
111 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
112 #if PetscDefined(HAVE_CUDA)
113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
115 #elif PetscDefined(HAVE_HIP)
116 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
117 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
118 #endif
119 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm));
120 PetscCall(MatCreate(comm, &B));
121 PetscCall(MatSetSizes(B, m, m, m, m));
122 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
123 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
124 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
125 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
126 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
127 *a = B;
128 PetscCall(MatDestroy(&B));
129 } else *a = B;
130 PetscFunctionReturn(PETSC_SUCCESS);
131 }
132
MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
134 {
135 Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
136 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
137 PetscBool roworiented = A->roworiented;
138
139 PetscFunctionBegin;
140 for (i = 0; i < m; i++) {
141 if (idxm[i] < 0) continue;
142 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
143 if (idxm[i] >= rstart && idxm[i] < rend) {
144 row = idxm[i] - rstart;
145 if (roworiented) {
146 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv));
147 } else {
148 for (j = 0; j < n; j++) {
149 if (idxn[j] < 0) continue;
150 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
151 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv));
152 }
153 }
154 } else if (!A->donotstash) {
155 mat->assembled = PETSC_FALSE;
156 if (roworiented) {
157 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE));
158 } else {
159 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE));
160 }
161 }
162 }
163 PetscFunctionReturn(PETSC_SUCCESS);
164 }
165
MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
167 {
168 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
169 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
170
171 PetscFunctionBegin;
172 for (i = 0; i < m; i++) {
173 if (idxm[i] < 0) continue; /* negative row */
174 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
175 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
176 row = idxm[i] - rstart;
177 for (j = 0; j < n; j++) {
178 if (idxn[j] < 0) continue; /* negative column */
179 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
180 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
181 }
182 }
183 PetscFunctionReturn(PETSC_SUCCESS);
184 }
185
MatDenseGetLDA_MPIDense(Mat A,PetscInt * lda)186 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
187 {
188 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
189
190 PetscFunctionBegin;
191 PetscCall(MatDenseGetLDA(a->A, lda));
192 PetscFunctionReturn(PETSC_SUCCESS);
193 }
194
MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)195 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
196 {
197 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
198 MatType mtype = MATSEQDENSE;
199
200 PetscFunctionBegin;
201 if (!a->A) {
202 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
203 PetscCall(PetscLayoutSetUp(A->rmap));
204 PetscCall(PetscLayoutSetUp(A->cmap));
205 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
206 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
207 #if PetscDefined(HAVE_CUDA)
208 PetscBool iscuda;
209 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
210 if (iscuda) mtype = MATSEQDENSECUDA;
211 #elif PetscDefined(HAVE_HIP)
212 PetscBool iship;
213 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
214 if (iship) mtype = MATSEQDENSEHIP;
215 #endif
216 PetscCall(MatSetType(a->A, mtype));
217 }
218 PetscCall(MatDenseSetLDA(a->A, lda));
219 PetscFunctionReturn(PETSC_SUCCESS);
220 }
221
MatDenseGetArray_MPIDense(Mat A,PetscScalar ** array)222 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
223 {
224 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
225
226 PetscFunctionBegin;
227 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
228 PetscCall(MatDenseGetArray(a->A, array));
229 PetscFunctionReturn(PETSC_SUCCESS);
230 }
231
MatDenseGetArrayRead_MPIDense(Mat A,PetscScalar ** array)232 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array)
233 {
234 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
235
236 PetscFunctionBegin;
237 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
238 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array));
239 PetscFunctionReturn(PETSC_SUCCESS);
240 }
241
MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar ** array)242 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
243 {
244 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
245
246 PetscFunctionBegin;
247 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
248 PetscCall(MatDenseGetArrayWrite(a->A, array));
249 PetscFunctionReturn(PETSC_SUCCESS);
250 }
251
MatDensePlaceArray_MPIDense(Mat A,const PetscScalar * array)252 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
253 {
254 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
255
256 PetscFunctionBegin;
257 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
258 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
259 PetscCall(MatDensePlaceArray(a->A, array));
260 PetscFunctionReturn(PETSC_SUCCESS);
261 }
262
MatDenseResetArray_MPIDense(Mat A)263 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
264 {
265 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
266
267 PetscFunctionBegin;
268 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
269 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
270 PetscCall(MatDenseResetArray(a->A));
271 PetscFunctionReturn(PETSC_SUCCESS);
272 }
273
MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar * array)274 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
275 {
276 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
277
278 PetscFunctionBegin;
279 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
280 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
281 PetscCall(MatDenseReplaceArray(a->A, array));
282 PetscFunctionReturn(PETSC_SUCCESS);
283 }
284
MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)285 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
286 {
287 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd;
288 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
289 const PetscInt *irow, *icol;
290 const PetscScalar *v;
291 PetscScalar *bv;
292 Mat newmat;
293 IS iscol_local;
294 MPI_Comm comm_is, comm_mat;
295
296 PetscFunctionBegin;
297 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
298 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
299 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
300
301 PetscCall(ISAllGather(iscol, &iscol_local));
302 PetscCall(ISGetIndices(isrow, &irow));
303 PetscCall(ISGetIndices(iscol_local, &icol));
304 PetscCall(ISGetLocalSize(isrow, &nrows));
305 PetscCall(ISGetLocalSize(iscol, &ncols));
306 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
307
308 /* No parallel redistribution currently supported! Should really check each index set
309 to confirm that it is OK. ... Currently supports only submatrix same partitioning as
310 original matrix! */
311
312 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
313 PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
314
315 /* Check submatrix call */
316 if (scall == MAT_REUSE_MATRIX) {
317 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
318 /* Really need to test rows and column sizes! */
319 newmat = *B;
320 } else {
321 /* Create and fill new matrix */
322 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
323 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
324 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
325 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
326 }
327
328 /* Now extract the data pointers and do the copy, column at a time */
329 newmatd = (Mat_MPIDense *)newmat->data;
330 PetscCall(MatDenseGetArray(newmatd->A, &bv));
331 PetscCall(MatDenseGetArrayRead(mat->A, &v));
332 PetscCall(MatDenseGetLDA(mat->A, &lda));
333 for (i = 0; i < Ncols; i++) {
334 const PetscScalar *av = v + lda * icol[i];
335 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
336 }
337 PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
338 PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
339
340 /* Assemble the matrices so that the correct flags are set */
341 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
342 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
343
344 /* Free work space */
345 PetscCall(ISRestoreIndices(isrow, &irow));
346 PetscCall(ISRestoreIndices(iscol_local, &icol));
347 PetscCall(ISDestroy(&iscol_local));
348 *B = newmat;
349 PetscFunctionReturn(PETSC_SUCCESS);
350 }
351
MatDenseRestoreArray_MPIDense(Mat A,PetscScalar ** array)352 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
353 {
354 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
355
356 PetscFunctionBegin;
357 PetscCall(MatDenseRestoreArray(a->A, array));
358 PetscFunctionReturn(PETSC_SUCCESS);
359 }
360
MatDenseRestoreArrayRead_MPIDense(Mat A,PetscScalar ** array)361 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array)
362 {
363 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
364
365 PetscFunctionBegin;
366 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array));
367 PetscFunctionReturn(PETSC_SUCCESS);
368 }
369
MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar ** array)370 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
371 {
372 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
373
374 PetscFunctionBegin;
375 PetscCall(MatDenseRestoreArrayWrite(a->A, array));
376 PetscFunctionReturn(PETSC_SUCCESS);
377 }
378
MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)379 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
380 {
381 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
382 PetscInt nstash, reallocs;
383
384 PetscFunctionBegin;
385 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
386
387 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
388 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
389 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
390 PetscFunctionReturn(PETSC_SUCCESS);
391 }
392
MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)393 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
394 {
395 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
396 PetscInt i, *row, *col, flg, j, rstart, ncols;
397 PetscMPIInt n;
398 PetscScalar *val;
399
400 PetscFunctionBegin;
401 if (!mdn->donotstash && !mat->nooffprocentries) {
402 /* wait on receives */
403 while (1) {
404 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
405 if (!flg) break;
406
407 for (i = 0; i < n;) {
408 /* Now identify the consecutive vals belonging to the same row */
409 for (j = i, rstart = row[j]; j < n; j++) {
410 if (row[j] != rstart) break;
411 }
412 if (j < n) ncols = j - i;
413 else ncols = n - i;
414 /* Now assemble all these values with a single function call */
415 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
416 i = j;
417 }
418 }
419 PetscCall(MatStashScatterEnd_Private(&mat->stash));
420 }
421
422 PetscCall(MatAssemblyBegin(mdn->A, mode));
423 PetscCall(MatAssemblyEnd(mdn->A, mode));
424 PetscFunctionReturn(PETSC_SUCCESS);
425 }
426
MatZeroEntries_MPIDense(Mat A)427 static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
428 {
429 Mat_MPIDense *l = (Mat_MPIDense *)A->data;
430
431 PetscFunctionBegin;
432 PetscCall(MatZeroEntries(l->A));
433 PetscFunctionReturn(PETSC_SUCCESS);
434 }
435
MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)436 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
437 {
438 Mat_MPIDense *l = (Mat_MPIDense *)A->data;
439 PetscInt i, len, *lrows;
440
441 PetscFunctionBegin;
442 /* get locally owned rows */
443 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
444 /* fix right-hand side if needed */
445 if (x && b) {
446 const PetscScalar *xx;
447 PetscScalar *bb;
448
449 PetscCall(VecGetArrayRead(x, &xx));
450 PetscCall(VecGetArrayWrite(b, &bb));
451 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
452 PetscCall(VecRestoreArrayRead(x, &xx));
453 PetscCall(VecRestoreArrayWrite(b, &bb));
454 }
455 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
456 if (diag != 0.0) {
457 Vec d;
458
459 PetscCall(MatCreateVecs(A, NULL, &d));
460 PetscCall(VecSet(d, diag));
461 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
462 PetscCall(VecDestroy(&d));
463 }
464 PetscCall(PetscFree(lrows));
465 PetscFunctionReturn(PETSC_SUCCESS);
466 }
467
468 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
469 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
470 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
471 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
472
MatMultColumnRange_MPIDense(Mat mat,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)473 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
474 {
475 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
476 const PetscScalar *ax;
477 PetscScalar *ay;
478 PetscMemType axmtype, aymtype;
479
480 PetscFunctionBegin;
481 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
482 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
483 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
484 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
485 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
486 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
487 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
488 PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end));
489 PetscFunctionReturn(PETSC_SUCCESS);
490 }
491
MatMult_MPIDense(Mat mat,Vec xx,Vec yy)492 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
493 {
494 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
495 const PetscScalar *ax;
496 PetscScalar *ay;
497 PetscMemType axmtype, aymtype;
498
499 PetscFunctionBegin;
500 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
501 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
502 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype));
503 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
504 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
505 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay));
506 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
507 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
508 PetscFunctionReturn(PETSC_SUCCESS);
509 }
510
MatMultAddColumnRange_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz,PetscInt c_start,PetscInt c_end)511 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
512 {
513 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
514 const PetscScalar *ax;
515 PetscScalar *ay;
516 PetscMemType axmtype, aymtype;
517
518 PetscFunctionBegin;
519 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
520 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
521 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
522 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
523 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
524 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
525 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
526 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end));
527 PetscFunctionReturn(PETSC_SUCCESS);
528 }
529
MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)530 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
531 {
532 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
533 const PetscScalar *ax;
534 PetscScalar *ay;
535 PetscMemType axmtype, aymtype;
536
537 PetscFunctionBegin;
538 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
539 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
540 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
541 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
542 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
543 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
544 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
545 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
546 PetscFunctionReturn(PETSC_SUCCESS);
547 }
548
MatMultHermitianTransposeColumnRange_MPIDense(Mat A,Vec xx,Vec yy,PetscInt c_start,PetscInt c_end)549 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end)
550 {
551 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
552 const PetscScalar *ax;
553 PetscScalar *ay;
554 PetscMemType axmtype, aymtype;
555 PetscInt r_start, r_end;
556 PetscInt c_start_local, c_end_local;
557
558 PetscFunctionBegin;
559 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
560 PetscCall(VecZeroEntries(a->lvec));
561 PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end));
562 c_start_local = PetscMax(c_start, r_start);
563 c_end_local = PetscMin(c_end, r_end);
564 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
565 if (c_end_local > c_start_local) {
566 if (PetscMemTypeHost(aymtype)) {
567 PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local)));
568 } else {
569 PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start))));
570 PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local)));
571 }
572 }
573 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
574 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
575 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
576 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
577 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
578 PetscCall(VecRestoreArrayAndMemType(yy, &ay));
579 PetscFunctionReturn(PETSC_SUCCESS);
580 }
581
MatMultTransposeKernel_MPIDense(Mat A,Vec xx,Vec yy,PetscBool herm)582 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm)
583 {
584 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
585 const PetscScalar *ax;
586 PetscScalar *ay;
587 PetscMemType axmtype, aymtype;
588
589 PetscFunctionBegin;
590 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
591 PetscCall(VecSet(yy, 0.0));
592 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
593 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
594 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
595 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
596 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
597 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
598 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
599 PetscCall(VecRestoreArrayAndMemType(yy, &ay));
600 PetscFunctionReturn(PETSC_SUCCESS);
601 }
602
MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A,Vec xx,Vec yy,Vec zz,PetscInt c_start,PetscInt c_end)603 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end)
604 {
605 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
606 const PetscScalar *ax;
607 PetscScalar *ay;
608 PetscMemType axmtype, aymtype;
609
610 PetscFunctionBegin;
611 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
612 PetscCall(VecCopy(yy, zz));
613 PetscCall(VecZeroEntries(a->lvec));
614 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end));
615 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
616 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
617 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
618 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
619 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
620 PetscCall(VecRestoreArrayAndMemType(zz, &ay));
621 PetscFunctionReturn(PETSC_SUCCESS);
622 }
623
MatMultTransposeAddKernel_MPIDense(Mat A,Vec xx,Vec yy,Vec zz,PetscBool herm)624 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm)
625 {
626 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
627 const PetscScalar *ax;
628 PetscScalar *ay;
629 PetscMemType axmtype, aymtype;
630
631 PetscFunctionBegin;
632 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
633 PetscCall(VecCopy(yy, zz));
634 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec));
635 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
636 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
637 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
638 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
639 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
640 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
641 PetscCall(VecRestoreArrayAndMemType(zz, &ay));
642 PetscFunctionReturn(PETSC_SUCCESS);
643 }
644
MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)645 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
646 {
647 PetscFunctionBegin;
648 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE));
649 PetscFunctionReturn(PETSC_SUCCESS);
650 }
651
MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)652 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
653 {
654 PetscFunctionBegin;
655 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE));
656 PetscFunctionReturn(PETSC_SUCCESS);
657 }
658
MatMultHermitianTranspose_MPIDense(Mat A,Vec xx,Vec yy)659 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy)
660 {
661 PetscFunctionBegin;
662 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE));
663 PetscFunctionReturn(PETSC_SUCCESS);
664 }
665
MatMultHermitianTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)666 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
667 {
668 PetscFunctionBegin;
669 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE));
670 PetscFunctionReturn(PETSC_SUCCESS);
671 }
672
MatGetDiagonal_MPIDense(Mat A,Vec v)673 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
674 {
675 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
676 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd;
677 PetscScalar *x;
678 const PetscScalar *av;
679
680 PetscFunctionBegin;
681 PetscCall(VecGetArray(v, &x));
682 PetscCall(VecGetSize(v, &ng));
683 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
684 PetscCall(VecGetLocalSize(v, &nl));
685 len = PetscMin(a->A->rmap->n, a->A->cmap->n);
686 radd = A->rmap->rstart * m;
687 PetscCall(MatDenseGetArrayRead(a->A, &av));
688 PetscCall(MatDenseGetLDA(a->A, &lda));
689 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
690 PetscCall(MatDenseRestoreArrayRead(a->A, &av));
691 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i));
692 PetscCall(VecRestoreArray(v, &x));
693 PetscFunctionReturn(PETSC_SUCCESS);
694 }
695
MatDestroy_MPIDense(Mat mat)696 static PetscErrorCode MatDestroy_MPIDense(Mat mat)
697 {
698 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
699
700 PetscFunctionBegin;
701 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
702 PetscCall(MatStashDestroy_Private(&mat->stash));
703 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
704 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
705 PetscCall(MatDestroy(&mdn->A));
706 PetscCall(VecDestroy(&mdn->lvec));
707 PetscCall(PetscSFDestroy(&mdn->Mvctx));
708 PetscCall(VecDestroy(&mdn->cvec));
709 PetscCall(MatDestroy(&mdn->cmat));
710
711 PetscCall(PetscFree(mat->data));
712 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
713
714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
727 #if defined(PETSC_HAVE_ELEMENTAL)
728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
729 #endif
730 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
732 #endif
733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
736 #if defined(PETSC_HAVE_CUDA)
737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
755 #endif
756 #if defined(PETSC_HAVE_HIP)
757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
775 #endif
776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL));
787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL));
788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL));
789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL));
790
791 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
792 PetscFunctionReturn(PETSC_SUCCESS);
793 }
794
795 #include <petscdraw.h>
MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)796 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
797 {
798 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
799 PetscMPIInt rank;
800 PetscViewerType vtype;
801 PetscBool isascii, isdraw;
802 PetscViewer sviewer;
803 PetscViewerFormat format;
804
805 PetscFunctionBegin;
806 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
807 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
808 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
809 if (isascii) {
810 PetscCall(PetscViewerGetType(viewer, &vtype));
811 PetscCall(PetscViewerGetFormat(viewer, &format));
812 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
813 MatInfo info;
814 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
815 PetscCall(PetscViewerASCIIPushSynchronized(viewer));
816 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
817 (PetscInt)info.memory));
818 PetscCall(PetscViewerFlush(viewer));
819 PetscCall(PetscViewerASCIIPopSynchronized(viewer));
820 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
821 PetscFunctionReturn(PETSC_SUCCESS);
822 } else if (format == PETSC_VIEWER_ASCII_INFO) {
823 PetscFunctionReturn(PETSC_SUCCESS);
824 }
825 } else if (isdraw) {
826 PetscDraw draw;
827 PetscBool isnull;
828
829 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
830 PetscCall(PetscDrawIsNull(draw, &isnull));
831 if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
832 }
833
834 {
835 /* assemble the entire matrix onto first processor. */
836 Mat A;
837 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
838 PetscInt *cols;
839 PetscScalar *vals;
840
841 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
842 if (rank == 0) {
843 PetscCall(MatSetSizes(A, M, N, M, N));
844 } else {
845 PetscCall(MatSetSizes(A, 0, 0, M, N));
846 }
847 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
848 PetscCall(MatSetType(A, MATMPIDENSE));
849 PetscCall(MatMPIDenseSetPreallocation(A, NULL));
850
851 /* Copy the matrix ... This isn't the most efficient means,
852 but it's quick for now */
853 A->insertmode = INSERT_VALUES;
854
855 row = mat->rmap->rstart;
856 m = mdn->A->rmap->n;
857 for (i = 0; i < m; i++) {
858 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
859 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
860 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
861 row++;
862 }
863
864 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
865 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
866 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
867 if (rank == 0) {
868 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name));
869 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer));
870 }
871 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
872 PetscCall(MatDestroy(&A));
873 }
874 PetscFunctionReturn(PETSC_SUCCESS);
875 }
876
MatView_MPIDense(Mat mat,PetscViewer viewer)877 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
878 {
879 PetscBool isascii, isbinary, isdraw, issocket;
880
881 PetscFunctionBegin;
882 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
884 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
886
887 if (isascii || issocket || isdraw) PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
888 else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
889 PetscFunctionReturn(PETSC_SUCCESS);
890 }
891
MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo * info)892 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
893 {
894 Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
895 Mat mdn = mat->A;
896 PetscLogDouble isend[5], irecv[5];
897
898 PetscFunctionBegin;
899 info->block_size = 1.0;
900
901 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
902
903 isend[0] = info->nz_used;
904 isend[1] = info->nz_allocated;
905 isend[2] = info->nz_unneeded;
906 isend[3] = info->memory;
907 isend[4] = info->mallocs;
908 if (flag == MAT_LOCAL) {
909 info->nz_used = isend[0];
910 info->nz_allocated = isend[1];
911 info->nz_unneeded = isend[2];
912 info->memory = isend[3];
913 info->mallocs = isend[4];
914 } else if (flag == MAT_GLOBAL_MAX) {
915 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
916
917 info->nz_used = irecv[0];
918 info->nz_allocated = irecv[1];
919 info->nz_unneeded = irecv[2];
920 info->memory = irecv[3];
921 info->mallocs = irecv[4];
922 } else if (flag == MAT_GLOBAL_SUM) {
923 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
924
925 info->nz_used = irecv[0];
926 info->nz_allocated = irecv[1];
927 info->nz_unneeded = irecv[2];
928 info->memory = irecv[3];
929 info->mallocs = irecv[4];
930 }
931 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
932 info->fill_ratio_needed = 0;
933 info->factor_mallocs = 0;
934 PetscFunctionReturn(PETSC_SUCCESS);
935 }
936
MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)937 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
938 {
939 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
940
941 PetscFunctionBegin;
942 switch (op) {
943 case MAT_NEW_NONZERO_LOCATIONS:
944 case MAT_NEW_NONZERO_LOCATION_ERR:
945 case MAT_NEW_NONZERO_ALLOCATION_ERR:
946 MatCheckPreallocated(A, 1);
947 PetscCall(MatSetOption(a->A, op, flg));
948 break;
949 case MAT_ROW_ORIENTED:
950 MatCheckPreallocated(A, 1);
951 a->roworiented = flg;
952 PetscCall(MatSetOption(a->A, op, flg));
953 break;
954 case MAT_IGNORE_OFF_PROC_ENTRIES:
955 a->donotstash = flg;
956 break;
957 case MAT_SYMMETRIC:
958 case MAT_STRUCTURALLY_SYMMETRIC:
959 case MAT_HERMITIAN:
960 case MAT_SYMMETRY_ETERNAL:
961 case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
962 case MAT_SPD:
963 case MAT_SPD_ETERNAL:
964 /* if the diagonal matrix is square it inherits some of the properties above */
965 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
966 break;
967 default:
968 break;
969 }
970 PetscFunctionReturn(PETSC_SUCCESS);
971 }
972
MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)973 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
974 {
975 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
976 const PetscScalar *l;
977 PetscScalar x, *v, *vv, *r;
978 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
979
980 PetscFunctionBegin;
981 PetscCall(MatDenseGetArray(mdn->A, &vv));
982 PetscCall(MatDenseGetLDA(mdn->A, &lda));
983 PetscCall(MatGetLocalSize(A, &s2, &s3));
984 if (ll) {
985 PetscCall(VecGetLocalSize(ll, &s2a));
986 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
987 PetscCall(VecGetArrayRead(ll, &l));
988 for (i = 0; i < m; i++) {
989 x = l[i];
990 v = vv + i;
991 for (j = 0; j < n; j++) {
992 (*v) *= x;
993 v += lda;
994 }
995 }
996 PetscCall(VecRestoreArrayRead(ll, &l));
997 PetscCall(PetscLogFlops(1.0 * n * m));
998 }
999 if (rr) {
1000 const PetscScalar *ar;
1001
1002 PetscCall(VecGetLocalSize(rr, &s3a));
1003 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
1004 PetscCall(VecGetArrayRead(rr, &ar));
1005 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
1006 PetscCall(VecGetArray(mdn->lvec, &r));
1007 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1008 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
1009 PetscCall(VecRestoreArrayRead(rr, &ar));
1010 for (i = 0; i < n; i++) {
1011 x = r[i];
1012 v = vv + i * lda;
1013 for (j = 0; j < m; j++) (*v++) *= x;
1014 }
1015 PetscCall(VecRestoreArray(mdn->lvec, &r));
1016 PetscCall(PetscLogFlops(1.0 * n * m));
1017 }
1018 PetscCall(MatDenseRestoreArray(mdn->A, &vv));
1019 PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021
MatNorm_MPIDense(Mat A,NormType type,PetscReal * nrm)1022 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
1023 {
1024 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
1025 PetscInt i, j;
1026 PetscMPIInt size;
1027 PetscReal sum = 0.0;
1028 const PetscScalar *av, *v;
1029
1030 PetscFunctionBegin;
1031 PetscCall(MatDenseGetArrayRead(mdn->A, &av));
1032 v = av;
1033 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1034 if (size == 1) {
1035 PetscCall(MatNorm(mdn->A, type, nrm));
1036 } else {
1037 if (type == NORM_FROBENIUS) {
1038 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
1039 sum += PetscRealPart(PetscConj(*v) * (*v));
1040 v++;
1041 }
1042 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1043 *nrm = PetscSqrtReal(*nrm);
1044 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
1045 } else if (type == NORM_1) {
1046 PetscReal *tmp;
1047
1048 PetscCall(PetscCalloc1(A->cmap->N, &tmp));
1049 *nrm = 0.0;
1050 v = av;
1051 for (j = 0; j < mdn->A->cmap->n; j++) {
1052 for (i = 0; i < mdn->A->rmap->n; i++) {
1053 tmp[j] += PetscAbsScalar(*v);
1054 v++;
1055 }
1056 }
1057 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
1058 for (j = 0; j < A->cmap->N; j++) {
1059 if (tmp[j] > *nrm) *nrm = tmp[j];
1060 }
1061 PetscCall(PetscFree(tmp));
1062 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
1063 } else if (type == NORM_INFINITY) { /* max row norm */
1064 PetscCall(MatNorm(mdn->A, type, nrm));
1065 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
1066 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
1067 }
1068 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
1069 PetscFunctionReturn(PETSC_SUCCESS);
1070 }
1071
MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat * matout)1072 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
1073 {
1074 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1075 Mat B;
1076 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
1077 PetscInt j, i, lda;
1078 PetscScalar *v;
1079
1080 PetscFunctionBegin;
1081 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1082 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1083 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1084 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1085 PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1086 PetscCall(MatMPIDenseSetPreallocation(B, NULL));
1087 } else B = *matout;
1088
1089 m = a->A->rmap->n;
1090 n = a->A->cmap->n;
1091 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
1092 PetscCall(MatDenseGetLDA(a->A, &lda));
1093 PetscCall(PetscMalloc1(m, &rwork));
1094 for (i = 0; i < m; i++) rwork[i] = rstart + i;
1095 for (j = 0; j < n; j++) {
1096 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
1097 v = PetscSafePointerPlusOffset(v, lda);
1098 }
1099 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
1100 PetscCall(PetscFree(rwork));
1101 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1102 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1103 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1104 *matout = B;
1105 } else {
1106 PetscCall(MatHeaderMerge(A, &B));
1107 }
1108 PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110
1111 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
1112 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
1113
MatSetUp_MPIDense(Mat A)1114 static PetscErrorCode MatSetUp_MPIDense(Mat A)
1115 {
1116 PetscFunctionBegin;
1117 PetscCall(PetscLayoutSetUp(A->rmap));
1118 PetscCall(PetscLayoutSetUp(A->cmap));
1119 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1120 PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122
MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)1123 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1124 {
1125 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1126
1127 PetscFunctionBegin;
1128 PetscCall(MatAXPY(A->A, alpha, B->A, str));
1129 PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131
MatConjugate_MPIDense(Mat mat)1132 static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1133 {
1134 Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1135
1136 PetscFunctionBegin;
1137 PetscCall(MatConjugate(a->A));
1138 PetscFunctionReturn(PETSC_SUCCESS);
1139 }
1140
MatRealPart_MPIDense(Mat A)1141 static PetscErrorCode MatRealPart_MPIDense(Mat A)
1142 {
1143 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1144
1145 PetscFunctionBegin;
1146 PetscCall(MatRealPart(a->A));
1147 PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149
MatImaginaryPart_MPIDense(Mat A)1150 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1151 {
1152 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1153
1154 PetscFunctionBegin;
1155 PetscCall(MatImaginaryPart(a->A));
1156 PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158
MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)1159 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1160 {
1161 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1162
1163 PetscFunctionBegin;
1164 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1165 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1166 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1167 PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169
1170 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1171
MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal * reductions)1172 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1173 {
1174 PetscInt i, m, n;
1175 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1176
1177 PetscFunctionBegin;
1178 PetscCall(MatGetSize(A, &m, &n));
1179 if (type == REDUCTION_MEAN_REALPART) {
1180 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions));
1181 } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1182 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions));
1183 } else {
1184 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions));
1185 }
1186 if (type == NORM_2) {
1187 for (i = 0; i < n; i++) reductions[i] *= reductions[i];
1188 }
1189 if (type == NORM_INFINITY) {
1190 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1191 } else {
1192 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1193 }
1194 if (type == NORM_2) {
1195 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1196 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1197 for (i = 0; i < n; i++) reductions[i] /= m;
1198 }
1199 PetscFunctionReturn(PETSC_SUCCESS);
1200 }
1201
MatSetRandom_MPIDense(Mat x,PetscRandom rctx)1202 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1203 {
1204 Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1205
1206 PetscFunctionBegin;
1207 PetscCall(MatSetRandom(d->A, rctx));
1208 #if defined(PETSC_HAVE_DEVICE)
1209 x->offloadmask = d->A->offloadmask;
1210 #endif
1211 PetscFunctionReturn(PETSC_SUCCESS);
1212 }
1213
1214 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1215 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1216 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1217 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1218 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1219 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1220 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat);
1221
1222 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1223 MatGetRow_MPIDense,
1224 MatRestoreRow_MPIDense,
1225 MatMult_MPIDense,
1226 /* 4*/ MatMultAdd_MPIDense,
1227 MatMultTranspose_MPIDense,
1228 MatMultTransposeAdd_MPIDense,
1229 NULL,
1230 NULL,
1231 NULL,
1232 /* 10*/ NULL,
1233 NULL,
1234 NULL,
1235 NULL,
1236 MatTranspose_MPIDense,
1237 /* 15*/ MatGetInfo_MPIDense,
1238 MatEqual_MPIDense,
1239 MatGetDiagonal_MPIDense,
1240 MatDiagonalScale_MPIDense,
1241 MatNorm_MPIDense,
1242 /* 20*/ MatAssemblyBegin_MPIDense,
1243 MatAssemblyEnd_MPIDense,
1244 MatSetOption_MPIDense,
1245 MatZeroEntries_MPIDense,
1246 /* 24*/ MatZeroRows_MPIDense,
1247 NULL,
1248 NULL,
1249 NULL,
1250 NULL,
1251 /* 29*/ MatSetUp_MPIDense,
1252 NULL,
1253 NULL,
1254 MatGetDiagonalBlock_MPIDense,
1255 NULL,
1256 /* 34*/ MatDuplicate_MPIDense,
1257 NULL,
1258 NULL,
1259 NULL,
1260 NULL,
1261 /* 39*/ MatAXPY_MPIDense,
1262 MatCreateSubMatrices_MPIDense,
1263 NULL,
1264 MatGetValues_MPIDense,
1265 MatCopy_MPIDense,
1266 /* 44*/ NULL,
1267 MatScale_MPIDense,
1268 MatShift_MPIDense,
1269 NULL,
1270 NULL,
1271 /* 49*/ MatSetRandom_MPIDense,
1272 NULL,
1273 NULL,
1274 NULL,
1275 NULL,
1276 /* 54*/ NULL,
1277 NULL,
1278 NULL,
1279 NULL,
1280 NULL,
1281 /* 59*/ MatCreateSubMatrix_MPIDense,
1282 MatDestroy_MPIDense,
1283 MatView_MPIDense,
1284 NULL,
1285 NULL,
1286 /* 64*/ NULL,
1287 NULL,
1288 NULL,
1289 NULL,
1290 NULL,
1291 /* 69*/ NULL,
1292 NULL,
1293 NULL,
1294 NULL,
1295 NULL,
1296 /* 74*/ NULL,
1297 NULL,
1298 NULL,
1299 NULL,
1300 MatLoad_MPIDense,
1301 /* 79*/ NULL,
1302 NULL,
1303 NULL,
1304 NULL,
1305 /* 83*/ NULL,
1306 NULL,
1307 NULL,
1308 NULL,
1309 MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1310 MatMatTransposeMultNumeric_MPIDense_MPIDense,
1311 /* 89*/ NULL,
1312 MatProductSetFromOptions_MPIDense,
1313 NULL,
1314 NULL,
1315 MatConjugate_MPIDense,
1316 /* 94*/ NULL,
1317 NULL,
1318 MatRealPart_MPIDense,
1319 MatImaginaryPart_MPIDense,
1320 NULL,
1321 /*99*/ NULL,
1322 NULL,
1323 NULL,
1324 NULL,
1325 MatGetColumnVector_MPIDense,
1326 /*104*/ NULL,
1327 NULL,
1328 NULL,
1329 NULL,
1330 NULL,
1331 /*109*/ NULL,
1332 NULL,
1333 MatMultHermitianTranspose_MPIDense,
1334 MatMultHermitianTransposeAdd_MPIDense,
1335 NULL,
1336 /*114*/ NULL,
1337 MatGetColumnReductions_MPIDense,
1338 NULL,
1339 NULL,
1340 NULL,
1341 /*120*/ NULL,
1342 MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1343 MatTransposeMatMultNumeric_MPIDense_MPIDense,
1344 NULL,
1345 /*124*/ NULL,
1346 NULL,
1347 NULL,
1348 NULL,
1349 NULL,
1350 /*129*/ NULL,
1351 NULL,
1352 MatCreateMPIMatConcatenateSeqMat_MPIDense,
1353 NULL,
1354 NULL,
1355 /*134*/ NULL,
1356 NULL,
1357 NULL,
1358 NULL,
1359 NULL,
1360 /*139*/ NULL,
1361 NULL,
1362 NULL,
1363 NULL,
1364 NULL,
1365 NULL};
1366
MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar * data)1367 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1368 {
1369 Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1370 MatType mtype = MATSEQDENSE;
1371
1372 PetscFunctionBegin;
1373 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1374 PetscCall(PetscLayoutSetUp(mat->rmap));
1375 PetscCall(PetscLayoutSetUp(mat->cmap));
1376 if (!a->A) {
1377 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1378 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1379 }
1380 #if defined(PETSC_HAVE_CUDA)
1381 PetscBool iscuda;
1382 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1383 if (iscuda) mtype = MATSEQDENSECUDA;
1384 #endif
1385 #if defined(PETSC_HAVE_HIP)
1386 PetscBool iship;
1387 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1388 if (iship) mtype = MATSEQDENSEHIP;
1389 #endif
1390 PetscCall(MatSetType(a->A, mtype));
1391 PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1392 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1393 mat->offloadmask = a->A->offloadmask;
1394 #endif
1395 mat->preallocated = PETSC_TRUE;
1396 mat->assembled = PETSC_TRUE;
1397 PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399
MatConvert_MPIAIJ_MPIDense(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1400 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1401 {
1402 Mat B, C;
1403
1404 PetscFunctionBegin;
1405 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1406 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1407 PetscCall(MatDestroy(&C));
1408 if (reuse == MAT_REUSE_MATRIX) {
1409 C = *newmat;
1410 } else C = NULL;
1411 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1412 PetscCall(MatDestroy(&B));
1413 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1414 else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1415 PetscFunctionReturn(PETSC_SUCCESS);
1416 }
1417
MatConvert_MPIDense_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1418 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1419 {
1420 Mat B, C;
1421
1422 PetscFunctionBegin;
1423 PetscCall(MatDenseGetLocalMatrix(A, &C));
1424 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1425 if (reuse == MAT_REUSE_MATRIX) {
1426 C = *newmat;
1427 } else C = NULL;
1428 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1429 PetscCall(MatDestroy(&B));
1430 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
1431 else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1432 PetscFunctionReturn(PETSC_SUCCESS);
1433 }
1434
1435 #if defined(PETSC_HAVE_ELEMENTAL)
MatConvert_MPIDense_Elemental(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1436 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1437 {
1438 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1439 Mat mat_elemental;
1440 PetscScalar *v;
1441 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1442
1443 PetscFunctionBegin;
1444 if (reuse == MAT_REUSE_MATRIX) {
1445 mat_elemental = *newmat;
1446 PetscCall(MatZeroEntries(*newmat));
1447 } else {
1448 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1449 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1450 PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1451 PetscCall(MatSetUp(mat_elemental));
1452 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1453 }
1454
1455 PetscCall(PetscMalloc2(m, &rows, N, &cols));
1456 for (i = 0; i < N; i++) cols[i] = i;
1457 for (i = 0; i < m; i++) rows[i] = rstart + i;
1458
1459 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1460 PetscCall(MatDenseGetArray(A, &v));
1461 PetscCall(MatDenseGetLDA(a->A, &lda));
1462 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1463 else {
1464 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1465 }
1466 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1467 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1468 PetscCall(MatDenseRestoreArray(A, &v));
1469 PetscCall(PetscFree2(rows, cols));
1470
1471 if (reuse == MAT_INPLACE_MATRIX) {
1472 PetscCall(MatHeaderReplace(A, &mat_elemental));
1473 } else {
1474 *newmat = mat_elemental;
1475 }
1476 PetscFunctionReturn(PETSC_SUCCESS);
1477 }
1478 #endif
1479
MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar ** vals)1480 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1481 {
1482 Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1483
1484 PetscFunctionBegin;
1485 PetscCall(MatDenseGetColumn(mat->A, col, vals));
1486 PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488
MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar ** vals)1489 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1490 {
1491 Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1492
1493 PetscFunctionBegin;
1494 PetscCall(MatDenseRestoreColumn(mat->A, vals));
1495 PetscFunctionReturn(PETSC_SUCCESS);
1496 }
1497
MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)1498 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1499 {
1500 Mat_MPIDense *mat;
1501 PetscInt m, nloc, N;
1502
1503 PetscFunctionBegin;
1504 PetscCall(MatGetSize(inmat, &m, &N));
1505 PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1506 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1507 PetscInt sum;
1508
1509 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1510 /* Check sum(n) = N */
1511 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1512 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1513
1514 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1515 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1516 }
1517
1518 /* numeric phase */
1519 mat = (Mat_MPIDense *)(*outmat)->data;
1520 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1521 PetscFunctionReturn(PETSC_SUCCESS);
1522 }
1523
MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1524 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1525 {
1526 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1527 PetscInt lda;
1528
1529 PetscFunctionBegin;
1530 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1531 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1532 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1533 a->vecinuse = col + 1;
1534 PetscCall(MatDenseGetLDA(a->A, &lda));
1535 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1536 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1537 *v = a->cvec;
1538 PetscFunctionReturn(PETSC_SUCCESS);
1539 }
1540
MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1541 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1542 {
1543 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1544
1545 PetscFunctionBegin;
1546 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1547 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1548 VecCheckAssembled(a->cvec);
1549 a->vecinuse = 0;
1550 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1551 PetscCall(VecResetArray(a->cvec));
1552 if (v) *v = NULL;
1553 PetscFunctionReturn(PETSC_SUCCESS);
1554 }
1555
MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1556 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1557 {
1558 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1559 PetscInt lda;
1560
1561 PetscFunctionBegin;
1562 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1563 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1564 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1565 a->vecinuse = col + 1;
1566 PetscCall(MatDenseGetLDA(a->A, &lda));
1567 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1568 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1569 PetscCall(VecLockReadPush(a->cvec));
1570 *v = a->cvec;
1571 PetscFunctionReturn(PETSC_SUCCESS);
1572 }
1573
MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1574 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1575 {
1576 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1577
1578 PetscFunctionBegin;
1579 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1580 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1581 VecCheckAssembled(a->cvec);
1582 a->vecinuse = 0;
1583 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1584 PetscCall(VecLockReadPop(a->cvec));
1585 PetscCall(VecResetArray(a->cvec));
1586 if (v) *v = NULL;
1587 PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589
MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1590 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1591 {
1592 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1593 PetscInt lda;
1594
1595 PetscFunctionBegin;
1596 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1597 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1598 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1599 a->vecinuse = col + 1;
1600 PetscCall(MatDenseGetLDA(a->A, &lda));
1601 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1602 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda)));
1603 *v = a->cvec;
1604 PetscFunctionReturn(PETSC_SUCCESS);
1605 }
1606
MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1607 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1608 {
1609 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1610
1611 PetscFunctionBegin;
1612 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1613 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1614 VecCheckAssembled(a->cvec);
1615 a->vecinuse = 0;
1616 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1617 PetscCall(VecResetArray(a->cvec));
1618 if (v) *v = NULL;
1619 PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621
MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat * v)1622 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1623 {
1624 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1625 Mat_MPIDense *c;
1626 MPI_Comm comm;
1627 PetscInt prbegin, prend, pcbegin, pcend;
1628
1629 PetscFunctionBegin;
1630 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1631 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1632 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1633 prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1634 prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1635 pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart);
1636 pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart));
1637 if (!a->cmat) {
1638 PetscCall(MatCreate(comm, &a->cmat));
1639 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1640 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1641 else {
1642 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1643 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1644 PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1645 }
1646 if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap));
1647 else {
1648 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1649 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1650 PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1651 }
1652 c = (Mat_MPIDense *)a->cmat->data;
1653 c->sub_rbegin = rbegin;
1654 c->sub_rend = rend;
1655 c->sub_cbegin = cbegin;
1656 c->sub_cend = cend;
1657 }
1658 c = (Mat_MPIDense *)a->cmat->data;
1659 if (c->sub_rbegin != rbegin || c->sub_rend != rend) {
1660 PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1661 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1662 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin));
1663 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1664 PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1665 c->sub_rbegin = rbegin;
1666 c->sub_rend = rend;
1667 }
1668 if (c->sub_cbegin != cbegin || c->sub_cend != cend) {
1669 // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary
1670 if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) {
1671 PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1672 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1673 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin));
1674 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1675 PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1676 PetscCall(VecDestroy(&c->lvec));
1677 PetscCall(PetscSFDestroy(&c->Mvctx));
1678 }
1679 c->sub_cbegin = cbegin;
1680 c->sub_cend = cend;
1681 }
1682 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1683 PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A));
1684
1685 a->cmat->preallocated = PETSC_TRUE;
1686 a->cmat->assembled = PETSC_TRUE;
1687 #if defined(PETSC_HAVE_DEVICE)
1688 a->cmat->offloadmask = c->A->offloadmask;
1689 #endif
1690 a->matinuse = cbegin + 1;
1691 *v = a->cmat;
1692 PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694
MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat * v)1695 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1696 {
1697 Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1698 Mat_MPIDense *c;
1699
1700 PetscFunctionBegin;
1701 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1702 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1703 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1704 a->matinuse = 0;
1705 c = (Mat_MPIDense *)a->cmat->data;
1706 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1707 *v = NULL;
1708 #if defined(PETSC_HAVE_DEVICE)
1709 A->offloadmask = a->A->offloadmask;
1710 #endif
1711 PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713
1714 /*MC
1715 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1716
1717 Options Database Key:
1718 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1719
1720 Level: beginner
1721
1722 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1723 M*/
MatCreate_MPIDense(Mat mat)1724 PetscErrorCode MatCreate_MPIDense(Mat mat)
1725 {
1726 Mat_MPIDense *a;
1727
1728 PetscFunctionBegin;
1729 PetscCall(PetscNew(&a));
1730 mat->data = (void *)a;
1731 mat->ops[0] = MatOps_Values;
1732
1733 mat->insertmode = NOT_SET_VALUES;
1734
1735 /* build cache for off array entries formed */
1736 a->donotstash = PETSC_FALSE;
1737
1738 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1739
1740 /* stuff used for matrix vector multiply */
1741 a->lvec = NULL;
1742 a->Mvctx = NULL;
1743 a->roworiented = PETSC_TRUE;
1744
1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1766 #if defined(PETSC_HAVE_ELEMENTAL)
1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1768 #endif
1769 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1771 #endif
1772 #if defined(PETSC_HAVE_CUDA)
1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1774 #endif
1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1778 #if defined(PETSC_HAVE_CUDA)
1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1781 #endif
1782 #if defined(PETSC_HAVE_HIP)
1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1786 #endif
1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense));
1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense));
1791 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense));
1792 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense));
1793 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1794 PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796
1797 /*MC
1798 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1799
1800 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1801 and `MATMPIDENSE` otherwise.
1802
1803 Options Database Key:
1804 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1805
1806 Level: beginner
1807
1808 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1809 M*/
1810
1811 /*@
1812 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1813
1814 Collective
1815
1816 Input Parameters:
1817 + B - the matrix
1818 - data - optional location of matrix data. Set to `NULL` for PETSc
1819 to control all matrix memory allocation.
1820
1821 Level: intermediate
1822
1823 Notes:
1824 The dense format is fully compatible with standard Fortran
1825 storage by columns.
1826
1827 The data input variable is intended primarily for Fortran programmers
1828 who wish to allocate their own matrix memory space. Most users should
1829 set `data` to `NULL`.
1830
1831 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1832 @*/
MatMPIDenseSetPreallocation(Mat B,PetscScalar * data)1833 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1834 {
1835 PetscFunctionBegin;
1836 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
1837 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1838 PetscFunctionReturn(PETSC_SUCCESS);
1839 }
1840
1841 /*@
1842 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1843 array provided by the user. This is useful to avoid copying an array
1844 into a matrix
1845
1846 Not Collective
1847
1848 Input Parameters:
1849 + mat - the matrix
1850 - array - the array in column major order
1851
1852 Level: developer
1853
1854 Note:
1855 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1856
1857 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1858 freed when the matrix is destroyed.
1859
1860 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1861 `MatDenseReplaceArray()`
1862 @*/
MatDensePlaceArray(Mat mat,const PetscScalar * array)1863 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1864 {
1865 PetscFunctionBegin;
1866 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1867 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1868 PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1869 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1870 mat->offloadmask = PETSC_OFFLOAD_CPU;
1871 #endif
1872 PetscFunctionReturn(PETSC_SUCCESS);
1873 }
1874
1875 /*@
1876 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1877
1878 Not Collective
1879
1880 Input Parameter:
1881 . mat - the matrix
1882
1883 Level: developer
1884
1885 Note:
1886 You can only call this after a call to `MatDensePlaceArray()`
1887
1888 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1889 @*/
MatDenseResetArray(Mat mat)1890 PetscErrorCode MatDenseResetArray(Mat mat)
1891 {
1892 PetscFunctionBegin;
1893 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1894 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1895 PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1896 PetscFunctionReturn(PETSC_SUCCESS);
1897 }
1898
1899 /*@
1900 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1901 array provided by the user. This is useful to avoid copying an array
1902 into a matrix
1903
1904 Not Collective
1905
1906 Input Parameters:
1907 + mat - the matrix
1908 - array - the array in column major order
1909
1910 Level: developer
1911
1912 Note:
1913 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
1914
1915 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1916 freed by the user. It will be freed when the matrix is destroyed.
1917
1918 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1919 @*/
MatDenseReplaceArray(Mat mat,const PetscScalar * array)1920 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1921 {
1922 PetscFunctionBegin;
1923 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
1924 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1925 PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1926 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1927 mat->offloadmask = PETSC_OFFLOAD_CPU;
1928 #endif
1929 PetscFunctionReturn(PETSC_SUCCESS);
1930 }
1931
1932 /*@
1933 MatCreateDense - Creates a matrix in `MATDENSE` format.
1934
1935 Collective
1936
1937 Input Parameters:
1938 + comm - MPI communicator
1939 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1940 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1941 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1942 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1943 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc
1944 to control all matrix memory allocation.
1945
1946 Output Parameter:
1947 . A - the matrix
1948
1949 Level: intermediate
1950
1951 Notes:
1952 The dense format is fully compatible with standard Fortran
1953 storage by columns.
1954
1955 Although local portions of the matrix are stored in column-major
1956 order, the matrix is partitioned across MPI ranks by row.
1957
1958 The data input variable is intended primarily for Fortran programmers
1959 who wish to allocate their own matrix memory space. Most users should
1960 set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users).
1961
1962 The user MUST specify either the local or global matrix dimensions
1963 (possibly both).
1964
1965 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1966 @*/
MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar data[],Mat * A)1967 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A)
1968 {
1969 PetscFunctionBegin;
1970 PetscCall(MatCreate(comm, A));
1971 PetscCall(MatSetSizes(*A, m, n, M, N));
1972 PetscCall(MatSetType(*A, MATDENSE));
1973 PetscCall(MatSeqDenseSetPreallocation(*A, data));
1974 PetscCall(MatMPIDenseSetPreallocation(*A, data));
1975 PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977
MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat * newmat)1978 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1979 {
1980 Mat mat;
1981 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1982
1983 PetscFunctionBegin;
1984 *newmat = NULL;
1985 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1986 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1987 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1988 a = (Mat_MPIDense *)mat->data;
1989
1990 mat->factortype = A->factortype;
1991 mat->assembled = PETSC_TRUE;
1992 mat->preallocated = PETSC_TRUE;
1993
1994 mat->insertmode = NOT_SET_VALUES;
1995 a->donotstash = oldmat->donotstash;
1996
1997 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1998 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1999
2000 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2001
2002 *newmat = mat;
2003 PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005
MatLoad_MPIDense(Mat newMat,PetscViewer viewer)2006 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2007 {
2008 PetscBool isbinary;
2009 #if defined(PETSC_HAVE_HDF5)
2010 PetscBool ishdf5;
2011 #endif
2012
2013 PetscFunctionBegin;
2014 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2015 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2016 /* force binary viewer to load .info file if it has not yet done so */
2017 PetscCall(PetscViewerSetUp(viewer));
2018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2019 #if defined(PETSC_HAVE_HDF5)
2020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2021 #endif
2022 if (isbinary) {
2023 PetscCall(MatLoad_Dense_Binary(newMat, viewer));
2024 #if defined(PETSC_HAVE_HDF5)
2025 } else if (ishdf5) {
2026 PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
2027 #endif
2028 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2029 PetscFunctionReturn(PETSC_SUCCESS);
2030 }
2031
MatEqual_MPIDense(Mat A,Mat B,PetscBool * flag)2032 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2033 {
2034 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2035 Mat a, b;
2036
2037 PetscFunctionBegin;
2038 a = matA->A;
2039 b = matB->A;
2040 PetscCall(MatEqual(a, b, flag));
2041 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2042 PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044
MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense(PetscCtxRt data)2045 static PetscErrorCode MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense(PetscCtxRt data)
2046 {
2047 MatProductCtx_TransMatMultDense *atb = *(MatProductCtx_TransMatMultDense **)data;
2048
2049 PetscFunctionBegin;
2050 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
2051 PetscCall(MatDestroy(&atb->atb));
2052 PetscCall(PetscFree(atb));
2053 PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055
MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense(PetscCtxRt data)2056 static PetscErrorCode MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense(PetscCtxRt data)
2057 {
2058 MatProductCtx_MatTransMultDense *abt = *(MatProductCtx_MatTransMultDense **)data;
2059
2060 PetscFunctionBegin;
2061 PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
2062 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
2063 PetscCall(PetscFree(abt));
2064 PetscFunctionReturn(PETSC_SUCCESS);
2065 }
2066
MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2067 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2068 {
2069 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2070 MatProductCtx_TransMatMultDense *atb;
2071 MPI_Comm comm;
2072 PetscMPIInt size, *recvcounts;
2073 PetscScalar *carray, *sendbuf;
2074 const PetscScalar *atbarray;
2075 PetscInt i, cN = C->cmap->N, proc, k, j, lda;
2076 const PetscInt *ranges;
2077
2078 PetscFunctionBegin;
2079 MatCheckProduct(C, 3);
2080 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2081 atb = (MatProductCtx_TransMatMultDense *)C->product->data;
2082 recvcounts = atb->recvcounts;
2083 sendbuf = atb->sendbuf;
2084
2085 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2086 PetscCallMPI(MPI_Comm_size(comm, &size));
2087
2088 /* compute atbarray = aseq^T * bseq */
2089 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb));
2090
2091 PetscCall(MatGetOwnershipRanges(C, &ranges));
2092
2093 if (ranges[1] == C->rmap->N) {
2094 /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */
2095 PetscInt atb_lda, c_lda;
2096 Mat atb_local = atb->atb;
2097 Mat atb_alloc = NULL;
2098 Mat c_local = c->A;
2099 Mat c_alloc = NULL;
2100 PetscMemType atb_memtype, c_memtype;
2101 const PetscScalar *atb_array = NULL;
2102 MPI_Datatype vector_type;
2103 PetscScalar *c_array = NULL;
2104 PetscMPIInt rank;
2105
2106 PetscCallMPI(MPI_Comm_rank(comm, &rank));
2107
2108 PetscCall(MatDenseGetLDA(atb_local, &atb_lda));
2109 if (atb_lda != C->rmap->N) {
2110 // copy atb to a matrix that will have lda == the number of rows
2111 PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc));
2112 PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN));
2113 atb_local = atb_alloc;
2114 }
2115
2116 if (rank == 0) {
2117 PetscCall(MatDenseGetLDA(c_local, &c_lda));
2118 if (c_lda != C->rmap->N) {
2119 // copy c to a matrix that will have lda == the number of rows
2120 PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc));
2121 c_local = c_alloc;
2122 }
2123 PetscCall(MatZeroEntries(c_local));
2124 }
2125 /* atb_local and c_local have nrows = lda = A->cmap->N and ncols =
2126 * B->cmap->N: use the a->Mvctx to use the best reduction method */
2127 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
2128 vector_type = MPIU_SCALAR;
2129 if (B->cmap->N > 1) {
2130 PetscMPIInt mpi_N;
2131
2132 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2133 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2134 PetscCallMPI(MPI_Type_commit(&vector_type));
2135 }
2136 PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype));
2137 PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype));
2138 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM));
2139 PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM));
2140 PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array));
2141 PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array));
2142 if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN));
2143 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2144 PetscCall(MatDestroy(&atb_alloc));
2145 PetscCall(MatDestroy(&c_alloc));
2146 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2147 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2148 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2149 PetscFunctionReturn(PETSC_SUCCESS);
2150 }
2151
2152 /* arrange atbarray into sendbuf */
2153 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
2154 PetscCall(MatDenseGetLDA(atb->atb, &lda));
2155 for (proc = 0, k = 0; proc < size; proc++) {
2156 for (j = 0; j < cN; j++) {
2157 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2158 }
2159 }
2160 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
2161
2162 /* sum all atbarray to local values of C */
2163 PetscCall(MatDenseGetArrayWrite(c->A, &carray));
2164 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
2165 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
2166 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2167 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2168 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2169 PetscFunctionReturn(PETSC_SUCCESS);
2170 }
2171
MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2172 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2173 {
2174 MPI_Comm comm;
2175 PetscMPIInt size;
2176 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N;
2177 MatProductCtx_TransMatMultDense *atb;
2178 PetscBool cisdense = PETSC_FALSE;
2179 const PetscInt *ranges;
2180
2181 PetscFunctionBegin;
2182 MatCheckProduct(C, 4);
2183 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2184 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2185 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart,
2186 A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2187
2188 /* create matrix product C */
2189 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2190 #if defined(PETSC_HAVE_CUDA)
2191 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2192 #endif
2193 #if defined(PETSC_HAVE_HIP)
2194 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2195 #endif
2196 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2197 PetscCall(MatSetUp(C));
2198
2199 /* create data structure for reuse C */
2200 PetscCallMPI(MPI_Comm_size(comm, &size));
2201 PetscCall(PetscNew(&atb));
2202 cM = C->rmap->N;
2203 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2204 PetscCall(MatGetOwnershipRanges(C, &ranges));
2205 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i]));
2206 C->product->data = atb;
2207 C->product->destroy = MatProductCtxDestroy_MatTransMatMult_MPIDense_MPIDense;
2208 PetscFunctionReturn(PETSC_SUCCESS);
2209 }
2210
MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2211 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2212 {
2213 MPI_Comm comm;
2214 PetscMPIInt i, size;
2215 PetscInt maxRows, bufsiz;
2216 PetscMPIInt tag;
2217 PetscInt alg;
2218 MatProductCtx_MatTransMultDense *abt;
2219 Mat_Product *product = C->product;
2220 PetscBool flg;
2221
2222 PetscFunctionBegin;
2223 MatCheckProduct(C, 4);
2224 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2225 /* check local size of A and B */
2226 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2227
2228 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2229 alg = flg ? 0 : 1;
2230
2231 /* setup matrix product C */
2232 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2233 PetscCall(MatSetType(C, MATMPIDENSE));
2234 PetscCall(MatSetUp(C));
2235 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2236
2237 /* create data structure for reuse C */
2238 PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2239 PetscCallMPI(MPI_Comm_size(comm, &size));
2240 PetscCall(PetscNew(&abt));
2241 abt->tag = tag;
2242 abt->alg = alg;
2243 switch (alg) {
2244 case 1: /* alg: "cyclic" */
2245 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]);
2246 bufsiz = A->cmap->N * maxRows;
2247 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1]));
2248 break;
2249 default: /* alg: "allgatherv" */
2250 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1]));
2251 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls));
2252 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i]));
2253 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i]));
2254 break;
2255 }
2256
2257 C->product->data = abt;
2258 C->product->destroy = MatProductCtxDestroy_MatMatTransMult_MPIDense_MPIDense;
2259 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2260 PetscFunctionReturn(PETSC_SUCCESS);
2261 }
2262
MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A,Mat B,Mat C)2263 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2264 {
2265 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2266 MatProductCtx_MatTransMultDense *abt;
2267 MPI_Comm comm;
2268 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom;
2269 PetscScalar *sendbuf, *recvbuf = NULL, *cv;
2270 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn;
2271 PetscScalar _DOne = 1.0, _DZero = 0.0;
2272 const PetscScalar *av, *bv;
2273 PetscBLASInt cm, cn, ck, alda, blda = 0, clda;
2274 MPI_Request reqs[2];
2275 const PetscInt *ranges;
2276
2277 PetscFunctionBegin;
2278 MatCheckProduct(C, 3);
2279 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2280 abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2281 PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2282 PetscCallMPI(MPI_Comm_rank(comm, &rank));
2283 PetscCallMPI(MPI_Comm_size(comm, &size));
2284 PetscCall(MatDenseGetArrayRead(a->A, &av));
2285 PetscCall(MatDenseGetArrayRead(b->A, &bv));
2286 PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2287 PetscCall(MatDenseGetLDA(a->A, &i));
2288 PetscCall(PetscBLASIntCast(i, &alda));
2289 PetscCall(MatDenseGetLDA(b->A, &i));
2290 PetscCall(PetscBLASIntCast(i, &blda));
2291 PetscCall(MatDenseGetLDA(c->A, &i));
2292 PetscCall(PetscBLASIntCast(i, &clda));
2293 PetscCall(MatGetOwnershipRanges(B, &ranges));
2294 bn = B->rmap->n;
2295 if (blda == bn) {
2296 sendbuf = (PetscScalar *)bv;
2297 } else {
2298 sendbuf = abt->buf[0];
2299 for (k = 0, i = 0; i < cK; i++) {
2300 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2301 }
2302 }
2303 if (size > 1) {
2304 sendto = (rank + size - 1) % size;
2305 recvfrom = (rank + size + 1) % size;
2306 } else {
2307 sendto = recvfrom = 0;
2308 }
2309 PetscCall(PetscBLASIntCast(cK, &ck));
2310 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2311 recvisfrom = rank;
2312 for (i = 0; i < size; i++) {
2313 /* we have finished receiving in sending, bufs can be read/modified */
2314 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2315 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2316
2317 if (nextrecvisfrom != rank) {
2318 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2319 sendsiz = cK * bn;
2320 recvsiz = cK * nextbn;
2321 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2322 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2323 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2324 }
2325
2326 /* local aseq * sendbuf^T */
2327 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2328 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2329
2330 if (nextrecvisfrom != rank) {
2331 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2332 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2333 }
2334 bn = nextbn;
2335 recvisfrom = nextrecvisfrom;
2336 sendbuf = recvbuf;
2337 }
2338 PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2339 PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2340 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2341 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2342 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2343 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2344 PetscFunctionReturn(PETSC_SUCCESS);
2345 }
2346
MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A,Mat B,Mat C)2347 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2348 {
2349 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2350 MatProductCtx_MatTransMultDense *abt;
2351 MPI_Comm comm;
2352 PetscMPIInt size, ibn;
2353 PetscScalar *cv, *sendbuf, *recvbuf;
2354 const PetscScalar *av, *bv;
2355 PetscInt blda, i, cK = A->cmap->N, k, j, bn;
2356 PetscScalar _DOne = 1.0, _DZero = 0.0;
2357 PetscBLASInt cm, cn, ck, alda, clda;
2358
2359 PetscFunctionBegin;
2360 MatCheckProduct(C, 3);
2361 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2362 abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2363 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2364 PetscCallMPI(MPI_Comm_size(comm, &size));
2365 PetscCall(MatDenseGetArrayRead(a->A, &av));
2366 PetscCall(MatDenseGetArrayRead(b->A, &bv));
2367 PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2368 PetscCall(MatDenseGetLDA(a->A, &i));
2369 PetscCall(PetscBLASIntCast(i, &alda));
2370 PetscCall(MatDenseGetLDA(b->A, &blda));
2371 PetscCall(MatDenseGetLDA(c->A, &i));
2372 PetscCall(PetscBLASIntCast(i, &clda));
2373 /* copy transpose of B into buf[0] */
2374 bn = B->rmap->n;
2375 sendbuf = abt->buf[0];
2376 recvbuf = abt->buf[1];
2377 for (k = 0, j = 0; j < bn; j++) {
2378 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2379 }
2380 PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2381 PetscCall(PetscMPIIntCast(bn * cK, &ibn));
2382 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2383 PetscCall(PetscBLASIntCast(cK, &ck));
2384 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2385 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2386 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2387 PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2388 PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2389 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2390 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2391 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2392 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2393 PetscFunctionReturn(PETSC_SUCCESS);
2394 }
2395
MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2396 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2397 {
2398 MatProductCtx_MatTransMultDense *abt;
2399
2400 PetscFunctionBegin;
2401 MatCheckProduct(C, 3);
2402 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2403 abt = (MatProductCtx_MatTransMultDense *)C->product->data;
2404 switch (abt->alg) {
2405 case 1:
2406 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2407 break;
2408 default:
2409 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2410 break;
2411 }
2412 PetscFunctionReturn(PETSC_SUCCESS);
2413 }
2414
MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense(PetscCtxRt data)2415 static PetscErrorCode MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense(PetscCtxRt data)
2416 {
2417 MatProductCtx_MatMultDense *ab = *(MatProductCtx_MatMultDense **)data;
2418
2419 PetscFunctionBegin;
2420 PetscCall(MatDestroy(&ab->Ce));
2421 PetscCall(MatDestroy(&ab->Ae));
2422 PetscCall(MatDestroy(&ab->Be));
2423 PetscCall(PetscFree(ab));
2424 PetscFunctionReturn(PETSC_SUCCESS);
2425 }
2426
MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2427 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2428 {
2429 MatProductCtx_MatMultDense *ab;
2430 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
2431 Mat_MPIDense *b = (Mat_MPIDense *)B->data;
2432
2433 PetscFunctionBegin;
2434 MatCheckProduct(C, 3);
2435 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2436 ab = (MatProductCtx_MatMultDense *)C->product->data;
2437 if (ab->Ae && ab->Ce) {
2438 #if PetscDefined(HAVE_ELEMENTAL)
2439 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2440 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2441 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2442 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2443 #else
2444 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2445 #endif
2446 } else {
2447 MPI_Comm comm;
2448 const PetscScalar *read;
2449 PetscScalar *write;
2450 PetscInt lda;
2451 const PetscInt *ranges;
2452 PetscMPIInt size;
2453
2454 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */
2455 comm = PetscObjectComm((PetscObject)B);
2456 PetscCallMPI(MPI_Comm_size(comm, &size));
2457 PetscCall(PetscLayoutGetRanges(B->rmap, &ranges));
2458 if (ranges[1] == ranges[size]) {
2459 // optimize for the case where the B matrix is broadcast from rank 0
2460 PetscInt b_lda, be_lda;
2461 Mat b_local = b->A;
2462 Mat b_alloc = NULL;
2463 Mat be_local = ab->Be;
2464 Mat be_alloc = NULL;
2465 PetscMemType b_memtype, be_memtype;
2466 const PetscScalar *b_array = NULL;
2467 MPI_Datatype vector_type;
2468 PetscScalar *be_array = NULL;
2469 PetscMPIInt rank;
2470
2471 PetscCallMPI(MPI_Comm_rank(comm, &rank));
2472 PetscCall(MatDenseGetLDA(be_local, &be_lda));
2473 if (be_lda != B->rmap->N) {
2474 PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc));
2475 be_local = be_alloc;
2476 }
2477
2478 if (rank == 0) {
2479 PetscCall(MatDenseGetLDA(b_local, &b_lda));
2480 if (b_lda != B->rmap->N) {
2481 PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc));
2482 PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN));
2483 b_local = b_alloc;
2484 }
2485 }
2486 vector_type = MPIU_SCALAR;
2487 if (B->cmap->N > 1) {
2488 PetscMPIInt mpi_N;
2489
2490 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N));
2491 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type));
2492 PetscCallMPI(MPI_Type_commit(&vector_type));
2493 }
2494 PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype));
2495 PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype));
2496 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE));
2497 PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE));
2498 PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array));
2499 PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array));
2500 if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN));
2501 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type));
2502 PetscCall(MatDestroy(&be_alloc));
2503 PetscCall(MatDestroy(&b_alloc));
2504 } else {
2505 PetscCall(MatDenseGetLDA(B, &lda));
2506 PetscCall(MatDenseGetArrayRead(B, &read));
2507 PetscCall(MatDenseGetArrayWrite(ab->Be, &write));
2508 for (PetscInt i = 0; i < C->cmap->N; ++i) {
2509 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2510 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE));
2511 }
2512 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write));
2513 PetscCall(MatDenseRestoreArrayRead(B, &read));
2514 }
2515 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A));
2516 }
2517 PetscFunctionReturn(PETSC_SUCCESS);
2518 }
2519
MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2520 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2521 {
2522 Mat_Product *product = C->product;
2523 PetscInt alg;
2524 MatProductCtx_MatMultDense *ab;
2525 PetscBool flg;
2526
2527 PetscFunctionBegin;
2528 MatCheckProduct(C, 4);
2529 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2530 /* check local size of A and B */
2531 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")",
2532 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2533
2534 PetscCall(PetscStrcmp(product->alg, "petsc", &flg));
2535 alg = flg ? 0 : 1;
2536
2537 /* setup C */
2538 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
2539 PetscCall(MatSetType(C, MATMPIDENSE));
2540 PetscCall(MatSetUp(C));
2541
2542 /* create data structure for reuse Cdense */
2543 PetscCall(PetscNew(&ab));
2544
2545 switch (alg) {
2546 case 1: /* alg: "elemental" */
2547 #if PetscDefined(HAVE_ELEMENTAL)
2548 /* create elemental matrices Ae and Be */
2549 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae));
2550 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2551 PetscCall(MatSetType(ab->Ae, MATELEMENTAL));
2552 PetscCall(MatSetUp(ab->Ae));
2553 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2554
2555 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be));
2556 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2557 PetscCall(MatSetType(ab->Be, MATELEMENTAL));
2558 PetscCall(MatSetUp(ab->Be));
2559 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2560
2561 /* compute symbolic Ce = Ae*Be */
2562 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce));
2563 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce));
2564 #else
2565 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined");
2566 #endif
2567 break;
2568 default: /* alg: "petsc" */
2569 ab->Ae = NULL;
2570 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be));
2571 ab->Ce = NULL;
2572 break;
2573 }
2574
2575 C->product->data = ab;
2576 C->product->destroy = MatProductCtxDestroy_MatMatMult_MPIDense_MPIDense;
2577 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2578 PetscFunctionReturn(PETSC_SUCCESS);
2579 }
2580
MatProductSetFromOptions_MPIDense_AB(Mat C)2581 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2582 {
2583 Mat_Product *product = C->product;
2584 const char *algTypes[2] = {"petsc", "elemental"};
2585 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1;
2586 PetscBool flg = PETSC_FALSE;
2587
2588 PetscFunctionBegin;
2589 /* Set default algorithm */
2590 alg = 0; /* default is PETSc */
2591 PetscCall(PetscStrcmp(product->alg, "default", &flg));
2592 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2593
2594 /* Get runtime option */
2595 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2596 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg));
2597 PetscOptionsEnd();
2598 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2599
2600 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2601 C->ops->productsymbolic = MatProductSymbolic_AB;
2602 PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604
MatProductSetFromOptions_MPIDense_AtB(Mat C)2605 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2606 {
2607 Mat_Product *product = C->product;
2608 Mat A = product->A, B = product->B;
2609
2610 PetscFunctionBegin;
2611 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
2612 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2613 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2614 C->ops->productsymbolic = MatProductSymbolic_AtB;
2615 PetscFunctionReturn(PETSC_SUCCESS);
2616 }
2617
MatProductSetFromOptions_MPIDense_ABt(Mat C)2618 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2619 {
2620 Mat_Product *product = C->product;
2621 const char *algTypes[2] = {"allgatherv", "cyclic"};
2622 PetscInt alg, nalg = 2;
2623 PetscBool flg = PETSC_FALSE;
2624
2625 PetscFunctionBegin;
2626 /* Set default algorithm */
2627 alg = 0; /* default is allgatherv */
2628 PetscCall(PetscStrcmp(product->alg, "default", &flg));
2629 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2630
2631 /* Get runtime option */
2632 if (product->api_user) {
2633 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2634 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2635 PetscOptionsEnd();
2636 } else {
2637 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2638 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2639 PetscOptionsEnd();
2640 }
2641 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg]));
2642
2643 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2644 C->ops->productsymbolic = MatProductSymbolic_ABt;
2645 PetscFunctionReturn(PETSC_SUCCESS);
2646 }
2647
MatProductSetFromOptions_MPIDense(Mat C)2648 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2649 {
2650 Mat_Product *product = C->product;
2651
2652 PetscFunctionBegin;
2653 switch (product->type) {
2654 case MATPRODUCT_AB:
2655 PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2656 break;
2657 case MATPRODUCT_AtB:
2658 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2659 break;
2660 case MATPRODUCT_ABt:
2661 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2662 break;
2663 default:
2664 break;
2665 }
2666 PetscFunctionReturn(PETSC_SUCCESS);
2667 }
2668
MatDenseScatter_Private(PetscSF sf,Mat X,Mat Y,InsertMode mode,ScatterMode smode)2669 PetscErrorCode MatDenseScatter_Private(PetscSF sf, Mat X, Mat Y, InsertMode mode, ScatterMode smode)
2670 {
2671 const PetscScalar *in;
2672 PetscScalar *out;
2673 PetscSF vsf;
2674 PetscInt N, ny, rld, lld;
2675 PetscMemType mtype[2];
2676 MPI_Op op = MPI_OP_NULL;
2677
2678 PetscFunctionBegin;
2679 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2680 PetscValidHeaderSpecific(X, MAT_CLASSID, 2);
2681 PetscValidHeaderSpecific(Y, MAT_CLASSID, 3);
2682 if (mode == INSERT_VALUES) op = MPI_REPLACE;
2683 else if (mode == ADD_VALUES) op = MPIU_SUM;
2684 else if (mode == MAX_VALUES) op = MPIU_MAX;
2685 else if (mode == MIN_VALUES) op = MPIU_MIN;
2686 PetscCheck(op != MPI_OP_NULL, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in MatDenseScatter_Private()", mode);
2687 PetscCheck(smode == SCATTER_FORWARD || smode == SCATTER_REVERSE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported ScatterMode %d in MatDenseScatter_Private()", smode);
2688 PetscCall(MatGetSize(X, NULL, &N));
2689 PetscCall(MatGetSize(Y, NULL, &ny));
2690 PetscCheck(N == ny, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_SIZ, "Matrix column sizes must match: %" PetscInt_FMT " != %" PetscInt_FMT, N, ny);
2691 PetscCall(MatDenseGetLDA(X, &rld));
2692 PetscCall(MatDenseGetLDA(Y, &lld));
2693 /* get cached or create new strided PetscSF when the number of columns is greater than one */
2694 if (N > 1) {
2695 PetscCall(PetscObjectQuery((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject *)&vsf));
2696 if (vsf) {
2697 PetscInt nr[2], nl[2];
2698
2699 PetscCall(PetscSFGetGraph(sf, nr, nl, NULL, NULL));
2700 PetscCall(PetscSFGetGraph(vsf, nr + 1, nl + 1, NULL, NULL));
2701 if (N * nr[0] != nr[1] || N * nl[0] != nl[1]) vsf = NULL;
2702 }
2703 if (!vsf) {
2704 PetscCall(PetscSFCreateStridedSF(sf, N, rld, lld, &vsf));
2705 PetscCall(PetscObjectCompose((PetscObject)sf, "_MatDenseScatter_StridedSF", (PetscObject)vsf));
2706 PetscCall(PetscObjectDereference((PetscObject)vsf));
2707 }
2708 } else vsf = sf;
2709 /* the output array is accessed in read and write mode,
2710 but write-only in the INSERT_VALUES case could be worth exploring */
2711 PetscCall(MatDenseGetArrayReadAndMemType(X, &in, &mtype[0]));
2712 PetscCall(MatDenseGetArrayAndMemType(Y, &out, &mtype[1]));
2713 if (smode == SCATTER_FORWARD) {
2714 PetscCall(PetscSFBcastWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2715 PetscCall(PetscSFBcastEnd(vsf, vsf->vscat.unit, in, out, op));
2716 } else {
2717 PetscCall(PetscSFReduceWithMemTypeBegin(vsf, vsf->vscat.unit, mtype[0], in, mtype[1], out, op));
2718 PetscCall(PetscSFReduceEnd(vsf, vsf->vscat.unit, in, out, op));
2719 }
2720 PetscCall(MatDenseRestoreArrayAndMemType(Y, &out));
2721 PetscCall(MatDenseRestoreArrayReadAndMemType(X, &in));
2722 PetscFunctionReturn(PETSC_SUCCESS);
2723 }
2724