1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9
10 /*
11 For ghost i that may be negative or greater than the upper bound this
12 maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15
DMDASetBlockFills_Private(const PetscInt * dfill,PetscInt w,PetscInt ** rfill)16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18 PetscInt i, j, nz, *fill;
19
20 PetscFunctionBegin;
21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22
23 /* count number nonzeros */
24 nz = 0;
25 for (i = 0; i < w; i++) {
26 for (j = 0; j < w; j++) {
27 if (dfill[w * i + j]) nz++;
28 }
29 }
30 PetscCall(PetscMalloc1(nz + w + 1, &fill));
31 /* construct modified CSR storage of nonzero structure */
32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
33 so fill[1] - fill[0] gives number of nonzeros in first row etc */
34 nz = w + 1;
35 for (i = 0; i < w; i++) {
36 fill[i] = nz;
37 for (j = 0; j < w; j++) {
38 if (dfill[w * i + j]) {
39 fill[nz] = j;
40 nz++;
41 }
42 }
43 }
44 fill[w] = nz;
45
46 *rfill = fill;
47 PetscFunctionReturn(PETSC_SUCCESS);
48 }
49
DMDASetBlockFillsSparse_Private(const PetscInt * dfillsparse,PetscInt w,PetscInt ** rfill)50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52 PetscInt nz;
53
54 PetscFunctionBegin;
55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56
57 /* Determine number of non-zeros */
58 nz = (dfillsparse[w] - w - 1);
59
60 /* Allocate space for our copy of the given sparse matrix representation. */
61 PetscCall(PetscMalloc1(nz + w + 1, rfill));
62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63 PetscFunctionReturn(PETSC_SUCCESS);
64 }
65
DMDASetBlockFills_Private2(DM_DA * dd)66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68 PetscInt i, k, cnt = 1;
69
70 PetscFunctionBegin;
71 /* ofillcount tracks the columns of ofill that have any nonzero in them; the value in each location is the number of
72 columns to the left with any nonzeros in them plus 1 */
73 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
74 for (i = 0; i < dd->w; i++) {
75 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
76 }
77 for (i = 0; i < dd->w; i++) {
78 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
79 }
80 PetscFunctionReturn(PETSC_SUCCESS);
81 }
82
83 /*@
84 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
85 of the matrix returned by `DMCreateMatrix()`.
86
87 Logically Collective
88
89 Input Parameters:
90 + da - the `DMDA`
91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
92 - ofill - the fill pattern in the off-diagonal blocks
93
94 Level: developer
95
96 Notes:
97 This only makes sense when you are doing multicomponent problems but using the
98 `MATMPIAIJ` matrix format
99
100 The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101 representing coupling and 0 entries for missing coupling. For example
102 .vb
103 dfill[9] = {1, 0, 0,
104 1, 1, 0,
105 0, 1, 1}
106 .ve
107 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109 diagonal block).
110
111 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112 can be represented in the `dfill`, `ofill` format
113
114 Contributed by\:
115 Glenn Hammond
116
117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118 @*/
DMDASetBlockFills(DM da,const PetscInt * dfill,const PetscInt * ofill)119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121 DM_DA *dd = (DM_DA *)da->data;
122
123 PetscFunctionBegin;
124 /* save the given dfill and ofill information */
125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127
128 /* count nonzeros in ofill columns */
129 PetscCall(DMDASetBlockFills_Private2(dd));
130 PetscFunctionReturn(PETSC_SUCCESS);
131 }
132
133 /*@
134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135 of the matrix returned by `DMCreateMatrix()`, using sparse representations
136 of fill patterns.
137
138 Logically Collective
139
140 Input Parameters:
141 + da - the `DMDA`
142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
144
145 Level: developer
146
147 Notes:
148 This only makes sense when you are doing multicomponent problems but using the
149 `MATMPIAIJ` matrix format
150
151 The format for `dfill` and `ofill` is a sparse representation of a
152 dof-by-dof matrix with 1 entries representing coupling and 0 entries
153 for missing coupling. The sparse representation is a 1 dimensional
154 array of length nz + dof + 1, where nz is the number of non-zeros in
155 the matrix. The first dof entries in the array give the
156 starting array indices of each row's items in the rest of the array,
157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158 and the remaining nz items give the column indices of each of
159 the 1s within the logical 2D matrix. Each row's items within
160 the array are the column indices of the 1s within that row
161 of the 2D matrix. PETSc developers may recognize that this is the
162 same format as that computed by the `DMDASetBlockFills_Private()`
163 function from a dense 2D matrix representation.
164
165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166 can be represented in the `dfill`, `ofill` format
167
168 Contributed by\:
169 Philip C. Roth
170
171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
DMDASetBlockFillsSparse(DM da,const PetscInt * dfillsparse,const PetscInt * ofillsparse)173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175 DM_DA *dd = (DM_DA *)da->data;
176
177 PetscFunctionBegin;
178 /* save the given dfill and ofill information */
179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181
182 /* count nonzeros in ofill columns */
183 PetscCall(DMDASetBlockFills_Private2(dd));
184 PetscFunctionReturn(PETSC_SUCCESS);
185 }
186
DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring * coloring)187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189 PetscInt dim, m, n, p, nc;
190 DMBoundaryType bx, by, bz;
191 MPI_Comm comm;
192 PetscMPIInt size;
193 PetscBool isBAIJ;
194 DM_DA *dd = (DM_DA *)da->data;
195
196 PetscFunctionBegin;
197 /*
198 m
199 ------------------------------------------------------
200 | |
201 | |
202 | ---------------------- |
203 | | | |
204 n | yn | | |
205 | | | |
206 | .--------------------- |
207 | (xs,ys) xn |
208 | . |
209 | (gxs,gys) |
210 | |
211 -----------------------------------------------------
212 */
213
214 /*
215 nc - number of components per grid point
216 col - number of colors needed in one direction for single component problem
217
218 */
219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220
221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222 PetscCallMPI(MPI_Comm_size(comm, &size));
223 if (ctype == IS_COLORING_LOCAL) {
224 PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process");
225 }
226
227 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
228 matrices is for the blocks, not the individual matrix elements */
229 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
230 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
231 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
232 if (isBAIJ) {
233 dd->w = 1;
234 dd->xs = dd->xs / nc;
235 dd->xe = dd->xe / nc;
236 dd->Xs = dd->Xs / nc;
237 dd->Xe = dd->Xe / nc;
238 }
239
240 /*
241 We do not provide a getcoloring function in the DMDA operations because
242 the basic DMDA does not know about matrices. We think of DMDA as being
243 more low-level then matrices.
244 */
245 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
246 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
247 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
248 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
249 if (isBAIJ) {
250 dd->w = nc;
251 dd->xs = dd->xs * nc;
252 dd->xe = dd->xe * nc;
253 dd->Xs = dd->Xs * nc;
254 dd->Xe = dd->Xe * nc;
255 }
256 PetscFunctionReturn(PETSC_SUCCESS);
257 }
258
DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)259 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
260 {
261 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
262 PetscInt ncolors = 0;
263 MPI_Comm comm;
264 DMBoundaryType bx, by;
265 DMDAStencilType st;
266 ISColoringValue *colors;
267 DM_DA *dd = (DM_DA *)da->data;
268
269 PetscFunctionBegin;
270 /*
271 nc - number of components per grid point
272 col - number of colors needed in one direction for single component problem
273
274 */
275 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
276 col = 2 * s + 1;
277 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
278 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
279 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
280
281 /* special case as taught to us by Paul Hovland */
282 if (st == DMDA_STENCIL_STAR && s == 1) {
283 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
284 } else {
285 if (ctype == IS_COLORING_GLOBAL) {
286 if (!dd->localcoloring) {
287 PetscCall(PetscMalloc1(nc * nx * ny, &colors));
288 ii = 0;
289 for (j = ys; j < ys + ny; j++) {
290 for (i = xs; i < xs + nx; i++) {
291 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((i % col) + col * (j % col)), colors + ii++));
292 }
293 }
294 ncolors = nc + nc * (col - 1 + col * (col - 1));
295 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
296 }
297 *coloring = dd->localcoloring;
298 } else if (ctype == IS_COLORING_LOCAL) {
299 if (!dd->ghostedcoloring) {
300 PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
301 ii = 0;
302 for (j = gys; j < gys + gny; j++) {
303 for (i = gxs; i < gxs + gnx; i++) {
304 for (k = 0; k < nc; k++) {
305 /* the complicated stuff is to handle periodic boundaries */
306 PetscCall(ISColoringValueCast(k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)), colors + ii++));
307 }
308 }
309 }
310 ncolors = nc + nc * (col - 1 + col * (col - 1));
311 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
312 /* PetscIntView(ncolors,(PetscInt*)colors,0); */
313
314 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
315 }
316 *coloring = dd->ghostedcoloring;
317 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318 }
319 PetscCall(ISColoringReference(*coloring));
320 PetscFunctionReturn(PETSC_SUCCESS);
321 }
322
DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)323 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
324 {
325 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
326 PetscInt ncolors;
327 MPI_Comm comm;
328 DMBoundaryType bx, by, bz;
329 DMDAStencilType st;
330 ISColoringValue *colors;
331 DM_DA *dd = (DM_DA *)da->data;
332
333 PetscFunctionBegin;
334 /*
335 nc - number of components per grid point
336 col - number of colors needed in one direction for single component problem
337 */
338 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
339 col = 2 * s + 1;
340 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
341 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
342 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");
343
344 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
345 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
346 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
347
348 /* create the coloring */
349 if (ctype == IS_COLORING_GLOBAL) {
350 if (!dd->localcoloring) {
351 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
352 ii = 0;
353 for (k = zs; k < zs + nz; k++) {
354 for (j = ys; j < ys + ny; j++) {
355 for (i = xs; i < xs + nx; i++) {
356 for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * ((i % col) + col * (j % col) + col * col * (k % col)), colors + ii++));
357 }
358 }
359 }
360 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
361 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
362 }
363 *coloring = dd->localcoloring;
364 } else if (ctype == IS_COLORING_LOCAL) {
365 if (!dd->ghostedcoloring) {
366 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
367 ii = 0;
368 for (k = gzs; k < gzs + gnz; k++) {
369 for (j = gys; j < gys + gny; j++) {
370 for (i = gxs; i < gxs + gnx; i++) {
371 for (l = 0; l < nc; l++) {
372 /* the complicated stuff is to handle periodic boundaries */
373 PetscCall(ISColoringValueCast(l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)), colors + ii++));
374 }
375 }
376 }
377 }
378 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
379 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
380 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
381 }
382 *coloring = dd->ghostedcoloring;
383 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
384 PetscCall(ISColoringReference(*coloring));
385 PetscFunctionReturn(PETSC_SUCCESS);
386 }
387
DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)388 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389 {
390 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391 PetscInt ncolors;
392 MPI_Comm comm;
393 DMBoundaryType bx;
394 ISColoringValue *colors;
395 DM_DA *dd = (DM_DA *)da->data;
396
397 PetscFunctionBegin;
398 /*
399 nc - number of components per grid point
400 col - number of colors needed in one direction for single component problem
401 */
402 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
403 col = 2 * s + 1;
404 PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col);
405 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
406 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
407 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
408
409 /* create the coloring */
410 if (ctype == IS_COLORING_GLOBAL) {
411 if (!dd->localcoloring) {
412 PetscCall(PetscMalloc1(nc * nx, &colors));
413 if (dd->ofillcols) {
414 PetscInt tc = 0;
415 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
416 i1 = 0;
417 for (i = xs; i < xs + nx; i++) {
418 for (l = 0; l < nc; l++) {
419 if (dd->ofillcols[l] && (i % col)) {
420 PetscCall(ISColoringValueCast(nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l], colors + i1++));
421 } else {
422 PetscCall(ISColoringValueCast(l, colors + i1++));
423 }
424 }
425 }
426 ncolors = nc + 2 * s * tc;
427 } else {
428 i1 = 0;
429 for (i = xs; i < xs + nx; i++) {
430 for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * (i % col), colors + i1++));
431 }
432 ncolors = nc + nc * (col - 1);
433 }
434 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
435 }
436 *coloring = dd->localcoloring;
437 } else if (ctype == IS_COLORING_LOCAL) {
438 if (!dd->ghostedcoloring) {
439 PetscCall(PetscMalloc1(nc * gnx, &colors));
440 i1 = 0;
441 for (i = gxs; i < gxs + gnx; i++) {
442 for (l = 0; l < nc; l++) {
443 /* the complicated stuff is to handle periodic boundaries */
444 PetscCall(ISColoringValueCast(l + nc * (SetInRange(i, m) % col), colors + i1++));
445 }
446 }
447 ncolors = nc + nc * (col - 1);
448 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
449 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
450 }
451 *coloring = dd->ghostedcoloring;
452 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
453 PetscCall(ISColoringReference(*coloring));
454 PetscFunctionReturn(PETSC_SUCCESS);
455 }
456
DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)457 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
458 {
459 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
460 PetscInt ncolors;
461 MPI_Comm comm;
462 DMBoundaryType bx, by;
463 ISColoringValue *colors;
464 DM_DA *dd = (DM_DA *)da->data;
465
466 PetscFunctionBegin;
467 /*
468 nc - number of components per grid point
469 col - number of colors needed in one direction for single component problem
470 */
471 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
472 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
473 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
474 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
475 /* create the coloring */
476 if (ctype == IS_COLORING_GLOBAL) {
477 if (!dd->localcoloring) {
478 PetscCall(PetscMalloc1(nc * nx * ny, &colors));
479 ii = 0;
480 for (j = ys; j < ys + ny; j++) {
481 for (i = xs; i < xs + nx; i++) {
482 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * j + i) % 5), colors + ii++));
483 }
484 }
485 ncolors = 5 * nc;
486 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
487 }
488 *coloring = dd->localcoloring;
489 } else if (ctype == IS_COLORING_LOCAL) {
490 if (!dd->ghostedcoloring) {
491 PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
492 ii = 0;
493 for (j = gys; j < gys + gny; j++) {
494 for (i = gxs; i < gxs + gnx; i++) {
495 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5), colors + ii++));
496 }
497 }
498 ncolors = 5 * nc;
499 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
500 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
501 }
502 *coloring = dd->ghostedcoloring;
503 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
504 PetscFunctionReturn(PETSC_SUCCESS);
505 }
506
507 /* =========================================================================== */
508 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
509 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
510 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
511 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
512 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
513 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
522
MatView_MPI_DA(Mat A,PetscViewer viewer)523 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
524 {
525 DM da;
526 const char *prefix;
527 Mat AA, Anatural;
528 AO ao;
529 PetscInt rstart, rend, *petsc, i;
530 IS is;
531 MPI_Comm comm;
532 PetscViewerFormat format;
533 PetscBool flag;
534
535 PetscFunctionBegin;
536 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
537 PetscCall(PetscViewerGetFormat(viewer, &format));
538 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
539
540 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
541 PetscCall(MatGetDM(A, &da));
542 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
543
544 PetscCall(DMDAGetAO(da, &ao));
545 PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
546 PetscCall(PetscMalloc1(rend - rstart, &petsc));
547 for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
548 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
549 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
550
551 /* call viewer on natural ordering */
552 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
553 if (flag) {
554 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
555 A = AA;
556 }
557 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
558 PetscCall(ISDestroy(&is));
559 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
560 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
561 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
562 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
563 PetscCall(MatView(Anatural, viewer));
564 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
565 PetscCall(MatDestroy(&Anatural));
566 if (flag) PetscCall(MatDestroy(&AA));
567 PetscFunctionReturn(PETSC_SUCCESS);
568 }
569
MatLoad_MPI_DA(Mat A,PetscViewer viewer)570 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
571 {
572 DM da;
573 Mat Anatural, Aapp;
574 AO ao;
575 PetscInt rstart, rend, *app, i, m, n, M, N;
576 IS is;
577 MPI_Comm comm;
578
579 PetscFunctionBegin;
580 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
581 PetscCall(MatGetDM(A, &da));
582 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
583
584 /* Load the matrix in natural ordering */
585 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
586 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
587 PetscCall(MatGetSize(A, &M, &N));
588 PetscCall(MatGetLocalSize(A, &m, &n));
589 PetscCall(MatSetSizes(Anatural, m, n, M, N));
590 PetscCall(MatLoad(Anatural, viewer));
591
592 /* Map natural ordering to application ordering and create IS */
593 PetscCall(DMDAGetAO(da, &ao));
594 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
595 PetscCall(PetscMalloc1(rend - rstart, &app));
596 for (i = rstart; i < rend; i++) app[i - rstart] = i;
597 PetscCall(AOPetscToApplication(ao, rend - rstart, app));
598 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
599
600 /* Do permutation and replace header */
601 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
602 PetscCall(MatHeaderReplace(A, &Aapp));
603 PetscCall(ISDestroy(&is));
604 PetscCall(MatDestroy(&Anatural));
605 PetscFunctionReturn(PETSC_SUCCESS);
606 }
607
DMCreateMatrix_DA(DM da,Mat * J)608 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
609 {
610 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
611 Mat A;
612 MPI_Comm comm;
613 MatType Atype;
614 MatType mtype;
615 PetscMPIInt size;
616 DM_DA *dd = (DM_DA *)da->data;
617 PetscBool aij = PETSC_FALSE, baij = PETSC_FALSE, sbaij = PETSC_FALSE, sell = PETSC_FALSE, is = PETSC_FALSE;
618
619 PetscFunctionBegin;
620 PetscCall(MatInitializePackage());
621 mtype = da->mattype;
622
623 /*
624 m
625 ------------------------------------------------------
626 | |
627 | |
628 | ---------------------- |
629 | | | |
630 n | ny | | |
631 | | | |
632 | .--------------------- |
633 | (xs,ys) nx |
634 | . |
635 | (gxs,gys) |
636 | |
637 -----------------------------------------------------
638 */
639
640 /*
641 nc - number of components per grid point
642 col - number of colors needed in one direction for single component problem
643 */
644 M = dd->M;
645 N = dd->N;
646 P = dd->P;
647 dim = da->dim;
648 dof = dd->w;
649 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
650 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
651 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
652 PetscCall(MatCreate(comm, &A));
653 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
654 PetscCall(MatSetType(A, mtype));
655 PetscCall(MatSetFromOptions(A));
656 if (dof * nx * ny * nz < da->bind_below) {
657 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
658 PetscCall(MatBindToCPU(A, PETSC_TRUE));
659 }
660 PetscCall(MatSetDM(A, da));
661 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
662 PetscCall(MatGetType(A, &Atype));
663 /*
664 We do not provide a getmatrix function in the DMDA operations because
665 the basic DMDA does not know about matrices. We think of DMDA as being more
666 more low-level than matrices. This is kind of cheating but, cause sometimes
667 we think of DMDA has higher level than matrices.
668
669 We could switch based on Atype (or mtype), but we do not since the
670 specialized setting routines depend only on the particular preallocation
671 details of the matrix, not the type itself.
672 */
673 if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
674 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
675 if (!aij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
676 if (!aij) {
677 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
678 if (!baij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
679 if (!baij) {
680 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
681 if (!sbaij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
682 if (!sbaij) {
683 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
684 if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
685 }
686 if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
687 }
688 }
689 }
690 if (aij) {
691 if (dim == 1) {
692 if (dd->ofill) {
693 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
694 } else {
695 DMBoundaryType bx;
696 PetscMPIInt size;
697 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
698 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
699 if (size == 1 && bx == DM_BOUNDARY_NONE) {
700 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
701 } else {
702 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
703 }
704 }
705 } else if (dim == 2) {
706 if (dd->ofill) {
707 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
708 } else {
709 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
710 }
711 } else if (dim == 3) {
712 if (dd->ofill) {
713 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
714 } else {
715 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
716 }
717 }
718 } else if (baij) {
719 if (dim == 2) {
720 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
721 } else if (dim == 3) {
722 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
723 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
724 } else if (sbaij) {
725 if (dim == 2) {
726 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
727 } else if (dim == 3) {
728 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
729 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
730 } else if (sell) {
731 if (dim == 2) {
732 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
733 } else if (dim == 3) {
734 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
735 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
736 } else if (is) {
737 PetscCall(DMCreateMatrix_DA_IS(da, A));
738 } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
739 ISLocalToGlobalMapping ltog;
740
741 PetscCall(MatSetBlockSize(A, dof));
742 PetscCall(MatSetUp(A));
743 PetscCall(DMGetLocalToGlobalMapping(da, <og));
744 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
745 }
746 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
747 PetscCall(MatSetStencil(A, dim, dims, starts, dof));
748 PetscCall(MatSetDM(A, da));
749 PetscCallMPI(MPI_Comm_size(comm, &size));
750 if (size > 1) {
751 /* change viewer to display matrix in natural ordering */
752 PetscCall(MatSetOperation(A, MATOP_VIEW, (PetscErrorCodeFn *)MatView_MPI_DA));
753 PetscCall(MatSetOperation(A, MATOP_LOAD, (PetscErrorCodeFn *)MatLoad_MPI_DA));
754 }
755 *J = A;
756 PetscFunctionReturn(PETSC_SUCCESS);
757 }
758
DMCreateMatrix_DA_IS(DM dm,Mat J)759 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
760 {
761 DM_DA *da = (DM_DA *)dm->data;
762 Mat lJ, P;
763 ISLocalToGlobalMapping ltog;
764 IS is;
765 PetscBT bt;
766 const PetscInt *e_loc, *idx;
767 PetscInt i, nel, nen, nv, dof, *gidx, n, N;
768
769 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
770 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
771 PetscFunctionBegin;
772 dof = da->w;
773 PetscCall(MatSetBlockSize(J, dof));
774 PetscCall(DMGetLocalToGlobalMapping(dm, <og));
775
776 /* flag local elements indices in local DMDA numbering */
777 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
778 PetscCall(PetscBTCreate(nv / dof, &bt));
779 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
780 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
781
782 /* filter out (set to -1) the global indices not used by the local elements */
783 PetscCall(PetscMalloc1(nv / dof, &gidx));
784 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
785 PetscCall(PetscArraycpy(gidx, idx, nv / dof));
786 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
787 for (i = 0; i < nv / dof; i++)
788 if (!PetscBTLookup(bt, i)) gidx[i] = -1;
789 PetscCall(PetscBTDestroy(&bt));
790 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
791 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og));
792 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
793 PetscCall(ISLocalToGlobalMappingDestroy(<og));
794 PetscCall(ISDestroy(&is));
795
796 /* Preallocation */
797 if (dm->prealloc_skip) {
798 PetscCall(MatSetUp(J));
799 } else {
800 PetscCall(MatISGetLocalMat(J, &lJ));
801 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL));
802 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
803 PetscCall(MatSetType(P, MATPREALLOCATOR));
804 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
805 PetscCall(MatGetSize(lJ, &N, NULL));
806 PetscCall(MatGetLocalSize(lJ, &n, NULL));
807 PetscCall(MatSetSizes(P, n, n, N, N));
808 PetscCall(MatSetBlockSize(P, dof));
809 PetscCall(MatSetUp(P));
810 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
811 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
812 PetscCall(MatISRestoreLocalMat(J, &lJ));
813 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
814 PetscCall(MatDestroy(&P));
815
816 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
817 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
818 }
819 PetscFunctionReturn(PETSC_SUCCESS);
820 }
821
DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)822 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
823 {
824 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
825 PetscInt lstart, lend, pstart, pend, *dnz, *onz;
826 MPI_Comm comm;
827 PetscScalar *values;
828 DMBoundaryType bx, by;
829 ISLocalToGlobalMapping ltog;
830 DMDAStencilType st;
831
832 PetscFunctionBegin;
833 /*
834 nc - number of components per grid point
835 col - number of colors needed in one direction for single component problem
836 */
837 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
838 col = 2 * s + 1;
839 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
840 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
841 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
842
843 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
844 PetscCall(DMGetLocalToGlobalMapping(da, <og));
845
846 PetscCall(MatSetBlockSize(J, nc));
847 /* determine the matrix preallocation information */
848 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
849 for (i = xs; i < xs + nx; i++) {
850 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
851 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
852
853 for (j = ys; j < ys + ny; j++) {
854 slot = i - gxs + gnx * (j - gys);
855
856 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
857 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
858
859 cnt = 0;
860 for (k = 0; k < nc; k++) {
861 for (l = lstart; l < lend + 1; l++) {
862 for (p = pstart; p < pend + 1; p++) {
863 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
864 cols[cnt++] = k + nc * (slot + gnx * l + p);
865 }
866 }
867 }
868 rows[k] = k + nc * (slot);
869 }
870 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
871 }
872 }
873 PetscCall(MatSetBlockSize(J, nc));
874 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
875 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
876 MatPreallocateEnd(dnz, onz);
877
878 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
879
880 /*
881 For each node in the grid: we get the neighbors in the local (on processor ordering
882 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
883 PETSc ordering.
884 */
885 if (!da->prealloc_only) {
886 PetscCall(PetscCalloc1(col * col * nc * nc, &values));
887 for (i = xs; i < xs + nx; i++) {
888 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
889 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
890
891 for (j = ys; j < ys + ny; j++) {
892 slot = i - gxs + gnx * (j - gys);
893
894 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
895 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
896
897 cnt = 0;
898 for (k = 0; k < nc; k++) {
899 for (l = lstart; l < lend + 1; l++) {
900 for (p = pstart; p < pend + 1; p++) {
901 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
902 cols[cnt++] = k + nc * (slot + gnx * l + p);
903 }
904 }
905 }
906 rows[k] = k + nc * (slot);
907 }
908 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
909 }
910 }
911 PetscCall(PetscFree(values));
912 /* do not copy values to GPU since they are all zero and not yet needed there */
913 PetscCall(MatBindToCPU(J, PETSC_TRUE));
914 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
915 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
916 PetscCall(MatBindToCPU(J, PETSC_FALSE));
917 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
918 }
919 PetscCall(PetscFree2(rows, cols));
920 PetscFunctionReturn(PETSC_SUCCESS);
921 }
922
DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)923 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
924 {
925 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
926 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
927 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
928 MPI_Comm comm;
929 PetscScalar *values;
930 DMBoundaryType bx, by, bz;
931 ISLocalToGlobalMapping ltog;
932 DMDAStencilType st;
933
934 PetscFunctionBegin;
935 /*
936 nc - number of components per grid point
937 col - number of colors needed in one direction for single component problem
938 */
939 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
940 col = 2 * s + 1;
941 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
942 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
943 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
944
945 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
946 PetscCall(DMGetLocalToGlobalMapping(da, <og));
947
948 PetscCall(MatSetBlockSize(J, nc));
949 /* determine the matrix preallocation information */
950 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
951 for (i = xs; i < xs + nx; i++) {
952 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
953 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
954 for (j = ys; j < ys + ny; j++) {
955 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
956 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
957 for (k = zs; k < zs + nz; k++) {
958 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
959 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
960
961 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
962
963 cnt = 0;
964 for (l = 0; l < nc; l++) {
965 for (ii = istart; ii < iend + 1; ii++) {
966 for (jj = jstart; jj < jend + 1; jj++) {
967 for (kk = kstart; kk < kend + 1; kk++) {
968 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
969 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
970 }
971 }
972 }
973 }
974 rows[l] = l + nc * (slot);
975 }
976 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
977 }
978 }
979 }
980 PetscCall(MatSetBlockSize(J, nc));
981 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
982 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
983 MatPreallocateEnd(dnz, onz);
984 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
985
986 /*
987 For each node in the grid: we get the neighbors in the local (on processor ordering
988 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
989 PETSc ordering.
990 */
991 if (!da->prealloc_only) {
992 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
993 for (i = xs; i < xs + nx; i++) {
994 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
995 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
996 for (j = ys; j < ys + ny; j++) {
997 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
998 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
999 for (k = zs; k < zs + nz; k++) {
1000 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1001 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1002
1003 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1004
1005 cnt = 0;
1006 for (l = 0; l < nc; l++) {
1007 for (ii = istart; ii < iend + 1; ii++) {
1008 for (jj = jstart; jj < jend + 1; jj++) {
1009 for (kk = kstart; kk < kend + 1; kk++) {
1010 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1011 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1012 }
1013 }
1014 }
1015 }
1016 rows[l] = l + nc * (slot);
1017 }
1018 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1019 }
1020 }
1021 }
1022 PetscCall(PetscFree(values));
1023 /* do not copy values to GPU since they are all zero and not yet needed there */
1024 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1025 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1026 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1027 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1028 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1029 }
1030 PetscCall(PetscFree2(rows, cols));
1031 PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033
DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)1034 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1035 {
1036 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1037 PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1038 MPI_Comm comm;
1039 DMBoundaryType bx, by;
1040 ISLocalToGlobalMapping ltog, mltog;
1041 DMDAStencilType st;
1042 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1043
1044 PetscFunctionBegin;
1045 /*
1046 nc - number of components per grid point
1047 col - number of colors needed in one direction for single component problem
1048 */
1049 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1050 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1051 col = 2 * s + 1;
1052 /*
1053 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1054 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1055 */
1056 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1057 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1058 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1059 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1060 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1061
1062 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1063 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1064
1065 PetscCall(MatSetBlockSize(J, nc));
1066 /* determine the matrix preallocation information */
1067 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1068 for (i = xs; i < xs + nx; i++) {
1069 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1070 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1071
1072 for (j = ys; j < ys + ny; j++) {
1073 slot = i - gxs + gnx * (j - gys);
1074
1075 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1076 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1077
1078 cnt = 0;
1079 for (k = 0; k < nc; k++) {
1080 for (l = lstart; l < lend + 1; l++) {
1081 for (p = pstart; p < pend + 1; p++) {
1082 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
1083 cols[cnt++] = k + nc * (slot + gnx * l + p);
1084 }
1085 }
1086 }
1087 rows[k] = k + nc * (slot);
1088 }
1089 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1090 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1091 }
1092 }
1093 PetscCall(MatSetBlockSize(J, nc));
1094 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1095 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1096 MatPreallocateEnd(dnz, onz);
1097 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1098 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1099
1100 /*
1101 For each node in the grid: we get the neighbors in the local (on processor ordering
1102 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1103 PETSc ordering.
1104 */
1105 if (!da->prealloc_only) {
1106 for (i = xs; i < xs + nx; i++) {
1107 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1108 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1109
1110 for (j = ys; j < ys + ny; j++) {
1111 slot = i - gxs + gnx * (j - gys);
1112
1113 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1114 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1115
1116 cnt = 0;
1117 for (l = lstart; l < lend + 1; l++) {
1118 for (p = pstart; p < pend + 1; p++) {
1119 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
1120 cols[cnt++] = nc * (slot + gnx * l + p);
1121 for (k = 1; k < nc; k++) {
1122 cols[cnt] = 1 + cols[cnt - 1];
1123 cnt++;
1124 }
1125 }
1126 }
1127 }
1128 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1129 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1130 }
1131 }
1132 /* do not copy values to GPU since they are all zero and not yet needed there */
1133 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1134 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1135 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1136 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1137 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1138 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1139 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1140 }
1141 PetscCall(PetscFree2(rows, cols));
1142 PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144
DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)1145 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1146 {
1147 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1148 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1149 PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1150 DM_DA *dd = (DM_DA *)da->data;
1151 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1152 MPI_Comm comm;
1153 DMBoundaryType bx, by;
1154 ISLocalToGlobalMapping ltog;
1155 DMDAStencilType st;
1156 PetscBool removedups = PETSC_FALSE;
1157
1158 PetscFunctionBegin;
1159 /*
1160 nc - number of components per grid point
1161 col - number of colors needed in one direction for single component problem
1162 */
1163 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1164 col = 2 * s + 1;
1165 /*
1166 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1167 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1168 */
1169 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1170 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1171 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1172 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1173 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1174
1175 PetscCall(PetscMalloc1(col * col * nc, &cols));
1176 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1177
1178 PetscCall(MatSetBlockSize(J, nc));
1179 /* determine the matrix preallocation information */
1180 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1181 for (i = xs; i < xs + nx; i++) {
1182 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1183 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1184
1185 for (j = ys; j < ys + ny; j++) {
1186 slot = i - gxs + gnx * (j - gys);
1187
1188 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1189 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1190
1191 for (k = 0; k < nc; k++) {
1192 cnt = 0;
1193 for (l = lstart; l < lend + 1; l++) {
1194 for (p = pstart; p < pend + 1; p++) {
1195 if (l || p) {
1196 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */
1197 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1198 }
1199 } else {
1200 if (dfill) {
1201 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1202 } else {
1203 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1204 }
1205 }
1206 }
1207 }
1208 row = k + nc * (slot);
1209 maxcnt = PetscMax(maxcnt, cnt);
1210 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1211 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1212 }
1213 }
1214 }
1215 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1216 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1217 MatPreallocateEnd(dnz, onz);
1218 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1219
1220 /*
1221 For each node in the grid: we get the neighbors in the local (on processor ordering
1222 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1223 PETSc ordering.
1224 */
1225 if (!da->prealloc_only) {
1226 for (i = xs; i < xs + nx; i++) {
1227 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1228 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1229
1230 for (j = ys; j < ys + ny; j++) {
1231 slot = i - gxs + gnx * (j - gys);
1232
1233 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1234 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1235
1236 for (k = 0; k < nc; k++) {
1237 cnt = 0;
1238 for (l = lstart; l < lend + 1; l++) {
1239 for (p = pstart; p < pend + 1; p++) {
1240 if (l || p) {
1241 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */
1242 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1243 }
1244 } else {
1245 if (dfill) {
1246 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1247 } else {
1248 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1249 }
1250 }
1251 }
1252 }
1253 row = k + nc * (slot);
1254 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1255 }
1256 }
1257 }
1258 /* do not copy values to GPU since they are all zero and not yet needed there */
1259 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1260 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1261 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1262 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1263 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1264 }
1265 PetscCall(PetscFree(cols));
1266 PetscFunctionReturn(PETSC_SUCCESS);
1267 }
1268
DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)1269 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1270 {
1271 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1272 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1273 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1274 MPI_Comm comm;
1275 DMBoundaryType bx, by, bz;
1276 ISLocalToGlobalMapping ltog, mltog;
1277 DMDAStencilType st;
1278 PetscBool removedups = PETSC_FALSE;
1279
1280 PetscFunctionBegin;
1281 /*
1282 nc - number of components per grid point
1283 col - number of colors needed in one direction for single component problem
1284 */
1285 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1286 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1287 col = 2 * s + 1;
1288
1289 /*
1290 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1291 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1292 */
1293 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1294 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1295 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1296
1297 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1298 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1299 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1300
1301 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1302 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1303
1304 PetscCall(MatSetBlockSize(J, nc));
1305 /* determine the matrix preallocation information */
1306 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1307 for (i = xs; i < xs + nx; i++) {
1308 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1309 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1310 for (j = ys; j < ys + ny; j++) {
1311 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1312 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1313 for (k = zs; k < zs + nz; k++) {
1314 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1315 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1316
1317 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1318
1319 cnt = 0;
1320 for (l = 0; l < nc; l++) {
1321 for (ii = istart; ii < iend + 1; ii++) {
1322 for (jj = jstart; jj < jend + 1; jj++) {
1323 for (kk = kstart; kk < kend + 1; kk++) {
1324 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1325 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1326 }
1327 }
1328 }
1329 }
1330 rows[l] = l + nc * (slot);
1331 }
1332 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1333 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1334 }
1335 }
1336 }
1337 PetscCall(MatSetBlockSize(J, nc));
1338 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1339 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1340 MatPreallocateEnd(dnz, onz);
1341 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1342 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1343
1344 /*
1345 For each node in the grid: we get the neighbors in the local (on processor ordering
1346 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1347 PETSc ordering.
1348 */
1349 if (!da->prealloc_only) {
1350 for (i = xs; i < xs + nx; i++) {
1351 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1352 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1353 for (j = ys; j < ys + ny; j++) {
1354 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1355 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1356 for (k = zs; k < zs + nz; k++) {
1357 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1358 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1359
1360 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1361
1362 cnt = 0;
1363 for (kk = kstart; kk < kend + 1; kk++) {
1364 for (jj = jstart; jj < jend + 1; jj++) {
1365 for (ii = istart; ii < iend + 1; ii++) {
1366 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1367 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1368 for (l = 1; l < nc; l++) {
1369 cols[cnt] = 1 + cols[cnt - 1];
1370 cnt++;
1371 }
1372 }
1373 }
1374 }
1375 }
1376 rows[0] = nc * (slot);
1377 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1378 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1379 }
1380 }
1381 }
1382 /* do not copy values to GPU since they are all zero and not yet needed there */
1383 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1384 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1385 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1386 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1387 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1388 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1389 }
1390 PetscCall(PetscFree2(rows, cols));
1391 PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393
DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)1394 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1395 {
1396 DM_DA *dd = (DM_DA *)da->data;
1397 PetscInt xs, nx, i, j, gxs, gnx, row, k, l;
1398 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1399 PetscInt *ofill = dd->ofill, *dfill = dd->dfill;
1400 DMBoundaryType bx;
1401 ISLocalToGlobalMapping ltog;
1402 PetscMPIInt rank, size;
1403
1404 PetscFunctionBegin;
1405 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1406 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1407
1408 /*
1409 nc - number of components per grid point
1410 */
1411 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1412 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1413 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1414 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1415
1416 PetscCall(MatSetBlockSize(J, nc));
1417 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1418
1419 /*
1420 note should be smaller for first and last process with no periodic
1421 does not handle dfill
1422 */
1423 cnt = 0;
1424 /* coupling with process to the left */
1425 for (i = 0; i < s; i++) {
1426 for (j = 0; j < nc; j++) {
1427 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1428 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1429 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1430 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1431 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1432 }
1433 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1434 cnt++;
1435 }
1436 }
1437 for (i = s; i < nx - s; i++) {
1438 for (j = 0; j < nc; j++) {
1439 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1440 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1441 cnt++;
1442 }
1443 }
1444 /* coupling with process to the right */
1445 for (i = nx - s; i < nx; i++) {
1446 for (j = 0; j < nc; j++) {
1447 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1448 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1449 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1450 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1451 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1452 }
1453 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1454 cnt++;
1455 }
1456 }
1457
1458 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1459 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1460 PetscCall(PetscFree2(cols, ocols));
1461
1462 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1463 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1464
1465 /*
1466 For each node in the grid: we get the neighbors in the local (on processor ordering
1467 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1468 PETSc ordering.
1469 */
1470 if (!da->prealloc_only) {
1471 PetscCall(PetscMalloc1(maxcnt, &cols));
1472 row = xs * nc;
1473 /* coupling with process to the left */
1474 for (i = xs; i < xs + s; i++) {
1475 for (j = 0; j < nc; j++) {
1476 cnt = 0;
1477 if (rank) {
1478 for (l = 0; l < s; l++) {
1479 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1480 }
1481 }
1482 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1483 for (l = 0; l < s; l++) {
1484 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1485 }
1486 }
1487 if (dfill) {
1488 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1489 } else {
1490 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1491 }
1492 for (l = 0; l < s; l++) {
1493 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1494 }
1495 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1496 row++;
1497 }
1498 }
1499 for (i = xs + s; i < xs + nx - s; i++) {
1500 for (j = 0; j < nc; j++) {
1501 cnt = 0;
1502 for (l = 0; l < s; l++) {
1503 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1504 }
1505 if (dfill) {
1506 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1507 } else {
1508 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1509 }
1510 for (l = 0; l < s; l++) {
1511 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1512 }
1513 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1514 row++;
1515 }
1516 }
1517 /* coupling with process to the right */
1518 for (i = xs + nx - s; i < xs + nx; i++) {
1519 for (j = 0; j < nc; j++) {
1520 cnt = 0;
1521 for (l = 0; l < s; l++) {
1522 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1523 }
1524 if (dfill) {
1525 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1526 } else {
1527 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1528 }
1529 if (rank < size - 1) {
1530 for (l = 0; l < s; l++) {
1531 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1532 }
1533 }
1534 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1535 for (l = 0; l < s; l++) {
1536 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1537 }
1538 }
1539 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1540 row++;
1541 }
1542 }
1543 PetscCall(PetscFree(cols));
1544 /* do not copy values to GPU since they are all zero and not yet needed there */
1545 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1546 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1547 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1548 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1549 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1550 }
1551 PetscFunctionReturn(PETSC_SUCCESS);
1552 }
1553
DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)1554 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1555 {
1556 PetscInt xs, nx, i, i1, slot, gxs, gnx;
1557 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1558 PetscInt istart, iend;
1559 DMBoundaryType bx;
1560 ISLocalToGlobalMapping ltog, mltog;
1561
1562 PetscFunctionBegin;
1563 /*
1564 nc - number of components per grid point
1565 col - number of colors needed in one direction for single component problem
1566 */
1567 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1568 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1569 col = 2 * s + 1;
1570
1571 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1572 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1573
1574 PetscCall(MatSetBlockSize(J, nc));
1575 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1576 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1577
1578 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1579 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1580 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1581
1582 /*
1583 For each node in the grid: we get the neighbors in the local (on processor ordering
1584 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1585 PETSc ordering.
1586 */
1587 if (!da->prealloc_only) {
1588 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1589 for (i = xs; i < xs + nx; i++) {
1590 istart = PetscMax(-s, gxs - i);
1591 iend = PetscMin(s, gxs + gnx - i - 1);
1592 slot = i - gxs;
1593
1594 cnt = 0;
1595 for (i1 = istart; i1 < iend + 1; i1++) {
1596 cols[cnt++] = nc * (slot + i1);
1597 for (l = 1; l < nc; l++) {
1598 cols[cnt] = 1 + cols[cnt - 1];
1599 cnt++;
1600 }
1601 }
1602 rows[0] = nc * (slot);
1603 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1604 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1605 }
1606 /* do not copy values to GPU since they are all zero and not yet needed there */
1607 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1608 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1609 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1610 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1611 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1612 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1613 PetscCall(PetscFree2(rows, cols));
1614 }
1615 PetscFunctionReturn(PETSC_SUCCESS);
1616 }
1617
DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)1618 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1619 {
1620 PetscInt xs, nx, i, i1, slot, gxs, gnx;
1621 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1622 PetscInt istart, iend;
1623 DMBoundaryType bx;
1624 ISLocalToGlobalMapping ltog, mltog;
1625
1626 PetscFunctionBegin;
1627 /*
1628 nc - number of components per grid point
1629 col - number of colors needed in one direction for single component problem
1630 */
1631 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1632 col = 2 * s + 1;
1633
1634 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1635 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1636
1637 PetscCall(MatSetBlockSize(J, nc));
1638 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1639
1640 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1641 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1642 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1643
1644 /*
1645 For each node in the grid: we get the neighbors in the local (on processor ordering
1646 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1647 PETSc ordering.
1648 */
1649 if (!da->prealloc_only) {
1650 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1651 for (i = xs; i < xs + nx; i++) {
1652 istart = PetscMax(-s, gxs - i);
1653 iend = PetscMin(s, gxs + gnx - i - 1);
1654 slot = i - gxs;
1655
1656 cnt = 0;
1657 for (i1 = istart; i1 < iend + 1; i1++) {
1658 cols[cnt++] = nc * (slot + i1);
1659 for (l = 1; l < nc; l++) {
1660 cols[cnt] = 1 + cols[cnt - 1];
1661 cnt++;
1662 }
1663 }
1664 rows[0] = nc * (slot);
1665 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1666 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1667 }
1668 /* do not copy values to GPU since they are all zero and not yet needed there */
1669 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1670 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1671 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1672 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1673 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1674 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1675 PetscCall(PetscFree2(rows, cols));
1676 }
1677 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1678 PetscFunctionReturn(PETSC_SUCCESS);
1679 }
1680
DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)1681 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1682 {
1683 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1684 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1685 PetscInt istart, iend, jstart, jend, ii, jj;
1686 MPI_Comm comm;
1687 PetscScalar *values;
1688 DMBoundaryType bx, by;
1689 DMDAStencilType st;
1690 ISLocalToGlobalMapping ltog;
1691
1692 PetscFunctionBegin;
1693 /*
1694 nc - number of components per grid point
1695 col - number of colors needed in one direction for single component problem
1696 */
1697 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1698 col = 2 * s + 1;
1699
1700 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1701 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1702 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1703
1704 PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1705
1706 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1707
1708 /* determine the matrix preallocation information */
1709 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1710 for (i = xs; i < xs + nx; i++) {
1711 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1712 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1713 for (j = ys; j < ys + ny; j++) {
1714 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1715 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1716 slot = i - gxs + gnx * (j - gys);
1717
1718 /* Find block columns in block row */
1719 cnt = 0;
1720 for (ii = istart; ii < iend + 1; ii++) {
1721 for (jj = jstart; jj < jend + 1; jj++) {
1722 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1723 cols[cnt++] = slot + ii + gnx * jj;
1724 }
1725 }
1726 }
1727 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1728 }
1729 }
1730 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1731 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1732 MatPreallocateEnd(dnz, onz);
1733
1734 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1735
1736 /*
1737 For each node in the grid: we get the neighbors in the local (on processor ordering
1738 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1739 PETSc ordering.
1740 */
1741 if (!da->prealloc_only) {
1742 PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1743 for (i = xs; i < xs + nx; i++) {
1744 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1745 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1746 for (j = ys; j < ys + ny; j++) {
1747 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1748 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1749 slot = i - gxs + gnx * (j - gys);
1750 cnt = 0;
1751 for (ii = istart; ii < iend + 1; ii++) {
1752 for (jj = jstart; jj < jend + 1; jj++) {
1753 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1754 cols[cnt++] = slot + ii + gnx * jj;
1755 }
1756 }
1757 }
1758 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1759 }
1760 }
1761 PetscCall(PetscFree(values));
1762 /* do not copy values to GPU since they are all zero and not yet needed there */
1763 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1764 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1765 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1766 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1767 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1768 }
1769 PetscCall(PetscFree(cols));
1770 PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772
DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)1773 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1774 {
1775 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1776 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1777 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1778 MPI_Comm comm;
1779 PetscScalar *values;
1780 DMBoundaryType bx, by, bz;
1781 DMDAStencilType st;
1782 ISLocalToGlobalMapping ltog;
1783
1784 PetscFunctionBegin;
1785 /*
1786 nc - number of components per grid point
1787 col - number of colors needed in one direction for single component problem
1788 */
1789 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1790 col = 2 * s + 1;
1791
1792 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1793 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1794 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1795
1796 PetscCall(PetscMalloc1(col * col * col, &cols));
1797
1798 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1799
1800 /* determine the matrix preallocation information */
1801 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1802 for (i = xs; i < xs + nx; i++) {
1803 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1804 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1805 for (j = ys; j < ys + ny; j++) {
1806 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1807 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1808 for (k = zs; k < zs + nz; k++) {
1809 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1810 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1811
1812 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1813
1814 /* Find block columns in block row */
1815 cnt = 0;
1816 for (ii = istart; ii < iend + 1; ii++) {
1817 for (jj = jstart; jj < jend + 1; jj++) {
1818 for (kk = kstart; kk < kend + 1; kk++) {
1819 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1820 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1821 }
1822 }
1823 }
1824 }
1825 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1826 }
1827 }
1828 }
1829 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1830 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1831 MatPreallocateEnd(dnz, onz);
1832
1833 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1834
1835 /*
1836 For each node in the grid: we get the neighbors in the local (on processor ordering
1837 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1838 PETSc ordering.
1839 */
1840 if (!da->prealloc_only) {
1841 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1842 for (i = xs; i < xs + nx; i++) {
1843 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1844 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1845 for (j = ys; j < ys + ny; j++) {
1846 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1847 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1848 for (k = zs; k < zs + nz; k++) {
1849 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1850 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1851
1852 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1853
1854 cnt = 0;
1855 for (ii = istart; ii < iend + 1; ii++) {
1856 for (jj = jstart; jj < jend + 1; jj++) {
1857 for (kk = kstart; kk < kend + 1; kk++) {
1858 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1859 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1860 }
1861 }
1862 }
1863 }
1864 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1865 }
1866 }
1867 }
1868 PetscCall(PetscFree(values));
1869 /* do not copy values to GPU since they are all zero and not yet needed there */
1870 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1871 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1872 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1873 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1874 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1875 }
1876 PetscCall(PetscFree(cols));
1877 PetscFunctionReturn(PETSC_SUCCESS);
1878 }
1879
1880 /*
1881 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1882 identify in the local ordering with periodic domain.
1883 */
L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt * row,PetscInt * cnt,PetscInt col[])1884 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1885 {
1886 PetscInt i, n;
1887
1888 PetscFunctionBegin;
1889 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1890 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1891 for (i = 0, n = 0; i < *cnt; i++) {
1892 if (col[i] >= *row) col[n++] = col[i];
1893 }
1894 *cnt = n;
1895 PetscFunctionReturn(PETSC_SUCCESS);
1896 }
1897
DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)1898 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1899 {
1900 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1901 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1902 PetscInt istart, iend, jstart, jend, ii, jj;
1903 MPI_Comm comm;
1904 PetscScalar *values;
1905 DMBoundaryType bx, by;
1906 DMDAStencilType st;
1907 ISLocalToGlobalMapping ltog;
1908
1909 PetscFunctionBegin;
1910 /*
1911 nc - number of components per grid point
1912 col - number of colors needed in one direction for single component problem
1913 */
1914 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1915 col = 2 * s + 1;
1916
1917 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1918 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1919 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1920
1921 PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1922
1923 PetscCall(DMGetLocalToGlobalMapping(da, <og));
1924
1925 /* determine the matrix preallocation information */
1926 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1927 for (i = xs; i < xs + nx; i++) {
1928 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1929 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1930 for (j = ys; j < ys + ny; j++) {
1931 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1932 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1933 slot = i - gxs + gnx * (j - gys);
1934
1935 /* Find block columns in block row */
1936 cnt = 0;
1937 for (ii = istart; ii < iend + 1; ii++) {
1938 for (jj = jstart; jj < jend + 1; jj++) {
1939 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1940 }
1941 }
1942 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1943 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1944 }
1945 }
1946 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1947 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1948 MatPreallocateEnd(dnz, onz);
1949
1950 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1951
1952 /*
1953 For each node in the grid: we get the neighbors in the local (on processor ordering
1954 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1955 PETSc ordering.
1956 */
1957 if (!da->prealloc_only) {
1958 PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1959 for (i = xs; i < xs + nx; i++) {
1960 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1961 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1962 for (j = ys; j < ys + ny; j++) {
1963 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1964 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1965 slot = i - gxs + gnx * (j - gys);
1966
1967 /* Find block columns in block row */
1968 cnt = 0;
1969 for (ii = istart; ii < iend + 1; ii++) {
1970 for (jj = jstart; jj < jend + 1; jj++) {
1971 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1972 }
1973 }
1974 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1975 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1976 }
1977 }
1978 PetscCall(PetscFree(values));
1979 /* do not copy values to GPU since they are all zero and not yet needed there */
1980 PetscCall(MatBindToCPU(J, PETSC_TRUE));
1981 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1982 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1983 PetscCall(MatBindToCPU(J, PETSC_FALSE));
1984 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1985 }
1986 PetscCall(PetscFree(cols));
1987 PetscFunctionReturn(PETSC_SUCCESS);
1988 }
1989
DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)1990 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1991 {
1992 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1993 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1994 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1995 MPI_Comm comm;
1996 PetscScalar *values;
1997 DMBoundaryType bx, by, bz;
1998 DMDAStencilType st;
1999 ISLocalToGlobalMapping ltog;
2000
2001 PetscFunctionBegin;
2002 /*
2003 nc - number of components per grid point
2004 col - number of colors needed in one direction for single component problem
2005 */
2006 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2007 col = 2 * s + 1;
2008
2009 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2010 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2011 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2012
2013 /* create the matrix */
2014 PetscCall(PetscMalloc1(col * col * col, &cols));
2015
2016 PetscCall(DMGetLocalToGlobalMapping(da, <og));
2017
2018 /* determine the matrix preallocation information */
2019 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2020 for (i = xs; i < xs + nx; i++) {
2021 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2022 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2023 for (j = ys; j < ys + ny; j++) {
2024 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2025 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2026 for (k = zs; k < zs + nz; k++) {
2027 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2028 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2029
2030 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2031
2032 /* Find block columns in block row */
2033 cnt = 0;
2034 for (ii = istart; ii < iend + 1; ii++) {
2035 for (jj = jstart; jj < jend + 1; jj++) {
2036 for (kk = kstart; kk < kend + 1; kk++) {
2037 if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2038 }
2039 }
2040 }
2041 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2042 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2043 }
2044 }
2045 }
2046 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2047 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2048 MatPreallocateEnd(dnz, onz);
2049
2050 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2051
2052 /*
2053 For each node in the grid: we get the neighbors in the local (on processor ordering
2054 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2055 PETSc ordering.
2056 */
2057 if (!da->prealloc_only) {
2058 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2059 for (i = xs; i < xs + nx; i++) {
2060 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2061 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2062 for (j = ys; j < ys + ny; j++) {
2063 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2064 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2065 for (k = zs; k < zs + nz; k++) {
2066 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2067 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2068
2069 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2070
2071 cnt = 0;
2072 for (ii = istart; ii < iend + 1; ii++) {
2073 for (jj = jstart; jj < jend + 1; jj++) {
2074 for (kk = kstart; kk < kend + 1; kk++) {
2075 if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2076 }
2077 }
2078 }
2079 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2080 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2081 }
2082 }
2083 }
2084 PetscCall(PetscFree(values));
2085 /* do not copy values to GPU since they are all zero and not yet needed there */
2086 PetscCall(MatBindToCPU(J, PETSC_TRUE));
2087 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2088 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2089 PetscCall(MatBindToCPU(J, PETSC_FALSE));
2090 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2091 }
2092 PetscCall(PetscFree(cols));
2093 PetscFunctionReturn(PETSC_SUCCESS);
2094 }
2095
DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)2096 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2097 {
2098 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2099 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2100 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2101 DM_DA *dd = (DM_DA *)da->data;
2102 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2103 MPI_Comm comm;
2104 PetscScalar *values;
2105 DMBoundaryType bx, by, bz;
2106 ISLocalToGlobalMapping ltog;
2107 DMDAStencilType st;
2108 PetscBool removedups = PETSC_FALSE;
2109
2110 PetscFunctionBegin;
2111 /*
2112 nc - number of components per grid point
2113 col - number of colors needed in one direction for single component problem
2114 */
2115 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2116 col = 2 * s + 1;
2117
2118 /*
2119 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2120 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2121 */
2122 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2123 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2124 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2125
2126 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2127 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2128 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2129
2130 PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2131 PetscCall(DMGetLocalToGlobalMapping(da, <og));
2132
2133 /* determine the matrix preallocation information */
2134 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2135
2136 PetscCall(MatSetBlockSize(J, nc));
2137 for (i = xs; i < xs + nx; i++) {
2138 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2139 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2140 for (j = ys; j < ys + ny; j++) {
2141 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2142 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2143 for (k = zs; k < zs + nz; k++) {
2144 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2145 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2146
2147 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2148
2149 for (l = 0; l < nc; l++) {
2150 cnt = 0;
2151 for (ii = istart; ii < iend + 1; ii++) {
2152 for (jj = jstart; jj < jend + 1; jj++) {
2153 for (kk = kstart; kk < kend + 1; kk++) {
2154 if (ii || jj || kk) {
2155 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2156 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2157 }
2158 } else {
2159 if (dfill) {
2160 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2161 } else {
2162 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2163 }
2164 }
2165 }
2166 }
2167 }
2168 row = l + nc * (slot);
2169 maxcnt = PetscMax(maxcnt, cnt);
2170 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2171 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2172 }
2173 }
2174 }
2175 }
2176 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2177 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2178 MatPreallocateEnd(dnz, onz);
2179 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2180
2181 /*
2182 For each node in the grid: we get the neighbors in the local (on processor ordering
2183 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2184 PETSc ordering.
2185 */
2186 if (!da->prealloc_only) {
2187 PetscCall(PetscCalloc1(maxcnt, &values));
2188 for (i = xs; i < xs + nx; i++) {
2189 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2190 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2191 for (j = ys; j < ys + ny; j++) {
2192 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2193 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2194 for (k = zs; k < zs + nz; k++) {
2195 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2196 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2197
2198 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2199
2200 for (l = 0; l < nc; l++) {
2201 cnt = 0;
2202 for (ii = istart; ii < iend + 1; ii++) {
2203 for (jj = jstart; jj < jend + 1; jj++) {
2204 for (kk = kstart; kk < kend + 1; kk++) {
2205 if (ii || jj || kk) {
2206 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2207 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2208 }
2209 } else {
2210 if (dfill) {
2211 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2212 } else {
2213 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2214 }
2215 }
2216 }
2217 }
2218 }
2219 row = l + nc * (slot);
2220 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2221 }
2222 }
2223 }
2224 }
2225 PetscCall(PetscFree(values));
2226 /* do not copy values to GPU since they are all zero and not yet needed there */
2227 PetscCall(MatBindToCPU(J, PETSC_TRUE));
2228 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2229 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2230 PetscCall(MatBindToCPU(J, PETSC_FALSE));
2231 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2232 }
2233 PetscCall(PetscFree(cols));
2234 PetscFunctionReturn(PETSC_SUCCESS);
2235 }
2236