xref: /petsc/src/dm/impls/da/fdda.c (revision 2286efddd54511ab18e8e2adb1e023c4bf8f0b92)
1 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
2 #include <petscmat.h>
3 #include <petscbt.h>
4 
5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
9 
10 /*
11    For ghost i that may be negative or greater than the upper bound this
12   maps it into the 0:m-1 range using periodicity
13 */
14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
15 
DMDASetBlockFills_Private(const PetscInt * dfill,PetscInt w,PetscInt ** rfill)16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
17 {
18   PetscInt i, j, nz, *fill;
19 
20   PetscFunctionBegin;
21   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
22 
23   /* count number nonzeros */
24   nz = 0;
25   for (i = 0; i < w; i++) {
26     for (j = 0; j < w; j++) {
27       if (dfill[w * i + j]) nz++;
28     }
29   }
30   PetscCall(PetscMalloc1(nz + w + 1, &fill));
31   /* construct modified CSR storage of nonzero structure */
32   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
33    so fill[1] - fill[0] gives number of nonzeros in first row etc */
34   nz = w + 1;
35   for (i = 0; i < w; i++) {
36     fill[i] = nz;
37     for (j = 0; j < w; j++) {
38       if (dfill[w * i + j]) {
39         fill[nz] = j;
40         nz++;
41       }
42     }
43   }
44   fill[w] = nz;
45 
46   *rfill = fill;
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
DMDASetBlockFillsSparse_Private(const PetscInt * dfillsparse,PetscInt w,PetscInt ** rfill)50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
51 {
52   PetscInt nz;
53 
54   PetscFunctionBegin;
55   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
56 
57   /* Determine number of non-zeros */
58   nz = (dfillsparse[w] - w - 1);
59 
60   /* Allocate space for our copy of the given sparse matrix representation. */
61   PetscCall(PetscMalloc1(nz + w + 1, rfill));
62   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
63   PetscFunctionReturn(PETSC_SUCCESS);
64 }
65 
DMDASetBlockFills_Private2(DM_DA * dd)66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
67 {
68   PetscInt i, k, cnt = 1;
69 
70   PetscFunctionBegin;
71   /* ofillcount tracks the columns of ofill that have any nonzero in them; the value in each location is the number of
72    columns to the left with any nonzeros in them plus 1 */
73   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
74   for (i = 0; i < dd->w; i++) {
75     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
76   }
77   for (i = 0; i < dd->w; i++) {
78     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
79   }
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 /*@
84   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
85   of the matrix returned by `DMCreateMatrix()`.
86 
87   Logically Collective
88 
89   Input Parameters:
90 + da    - the `DMDA`
91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
92 - ofill - the fill pattern in the off-diagonal blocks
93 
94   Level: developer
95 
96   Notes:
97   This only makes sense when you are doing multicomponent problems but using the
98   `MATMPIAIJ` matrix format
99 
100   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101   representing coupling and 0 entries for missing coupling. For example
102 .vb
103             dfill[9] = {1, 0, 0,
104                         1, 1, 0,
105                         0, 1, 1}
106 .ve
107   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109   diagonal block).
110 
111   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112   can be represented in the `dfill`, `ofill` format
113 
114   Contributed by\:
115   Glenn Hammond
116 
117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118 @*/
DMDASetBlockFills(DM da,const PetscInt * dfill,const PetscInt * ofill)119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120 {
121   DM_DA *dd = (DM_DA *)da->data;
122 
123   PetscFunctionBegin;
124   /* save the given dfill and ofill information */
125   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
127 
128   /* count nonzeros in ofill columns */
129   PetscCall(DMDASetBlockFills_Private2(dd));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 /*@
134   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136   of fill patterns.
137 
138   Logically Collective
139 
140   Input Parameters:
141 + da          - the `DMDA`
142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks
144 
145   Level: developer
146 
147   Notes:
148   This only makes sense when you are doing multicomponent problems but using the
149   `MATMPIAIJ` matrix format
150 
151   The format for `dfill` and `ofill` is a sparse representation of a
152   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153   for missing coupling.  The sparse representation is a 1 dimensional
154   array of length nz + dof + 1, where nz is the number of non-zeros in
155   the matrix.  The first dof entries in the array give the
156   starting array indices of each row's items in the rest of the array,
157   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158   and the remaining nz items give the column indices of each of
159   the 1s within the logical 2D matrix.  Each row's items within
160   the array are the column indices of the 1s within that row
161   of the 2D matrix.  PETSc developers may recognize that this is the
162   same format as that computed by the `DMDASetBlockFills_Private()`
163   function from a dense 2D matrix representation.
164 
165   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166   can be represented in the `dfill`, `ofill` format
167 
168   Contributed by\:
169   Philip C. Roth
170 
171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
DMDASetBlockFillsSparse(DM da,const PetscInt * dfillsparse,const PetscInt * ofillsparse)173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring * coloring)187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process");
225   }
226 
227   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
228      matrices is for the blocks, not the individual matrix elements  */
229   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
230   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
231   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
232   if (isBAIJ) {
233     dd->w  = 1;
234     dd->xs = dd->xs / nc;
235     dd->xe = dd->xe / nc;
236     dd->Xs = dd->Xs / nc;
237     dd->Xe = dd->Xe / nc;
238   }
239 
240   /*
241      We do not provide a getcoloring function in the DMDA operations because
242    the basic DMDA does not know about matrices. We think of DMDA as being
243    more low-level then matrices.
244   */
245   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
246   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
247   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
248   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
249   if (isBAIJ) {
250     dd->w  = nc;
251     dd->xs = dd->xs * nc;
252     dd->xe = dd->xe * nc;
253     dd->Xs = dd->Xs * nc;
254     dd->Xe = dd->Xe * nc;
255   }
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)259 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
260 {
261   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
262   PetscInt         ncolors = 0;
263   MPI_Comm         comm;
264   DMBoundaryType   bx, by;
265   DMDAStencilType  st;
266   ISColoringValue *colors;
267   DM_DA           *dd = (DM_DA *)da->data;
268 
269   PetscFunctionBegin;
270   /*
271          nc - number of components per grid point
272          col - number of colors needed in one direction for single component problem
273 
274   */
275   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
276   col = 2 * s + 1;
277   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
278   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
279   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
280 
281   /* special case as taught to us by Paul Hovland */
282   if (st == DMDA_STENCIL_STAR && s == 1) {
283     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
284   } else {
285     if (ctype == IS_COLORING_GLOBAL) {
286       if (!dd->localcoloring) {
287         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
288         ii = 0;
289         for (j = ys; j < ys + ny; j++) {
290           for (i = xs; i < xs + nx; i++) {
291             for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((i % col) + col * (j % col)), colors + ii++));
292           }
293         }
294         ncolors = nc + nc * (col - 1 + col * (col - 1));
295         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
296       }
297       *coloring = dd->localcoloring;
298     } else if (ctype == IS_COLORING_LOCAL) {
299       if (!dd->ghostedcoloring) {
300         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
301         ii = 0;
302         for (j = gys; j < gys + gny; j++) {
303           for (i = gxs; i < gxs + gnx; i++) {
304             for (k = 0; k < nc; k++) {
305               /* the complicated stuff is to handle periodic boundaries */
306               PetscCall(ISColoringValueCast(k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)), colors + ii++));
307             }
308           }
309         }
310         ncolors = nc + nc * (col - 1 + col * (col - 1));
311         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
312         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
313 
314         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
315       }
316       *coloring = dd->ghostedcoloring;
317     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318   }
319   PetscCall(ISColoringReference(*coloring));
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)323 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
324 {
325   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
326   PetscInt         ncolors;
327   MPI_Comm         comm;
328   DMBoundaryType   bx, by, bz;
329   DMDAStencilType  st;
330   ISColoringValue *colors;
331   DM_DA           *dd = (DM_DA *)da->data;
332 
333   PetscFunctionBegin;
334   /*
335          nc - number of components per grid point
336          col - number of colors needed in one direction for single component problem
337   */
338   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
339   col = 2 * s + 1;
340   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
341   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
342   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");
343 
344   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
345   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
346   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
347 
348   /* create the coloring */
349   if (ctype == IS_COLORING_GLOBAL) {
350     if (!dd->localcoloring) {
351       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
352       ii = 0;
353       for (k = zs; k < zs + nz; k++) {
354         for (j = ys; j < ys + ny; j++) {
355           for (i = xs; i < xs + nx; i++) {
356             for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * ((i % col) + col * (j % col) + col * col * (k % col)), colors + ii++));
357           }
358         }
359       }
360       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
361       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
362     }
363     *coloring = dd->localcoloring;
364   } else if (ctype == IS_COLORING_LOCAL) {
365     if (!dd->ghostedcoloring) {
366       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
367       ii = 0;
368       for (k = gzs; k < gzs + gnz; k++) {
369         for (j = gys; j < gys + gny; j++) {
370           for (i = gxs; i < gxs + gnx; i++) {
371             for (l = 0; l < nc; l++) {
372               /* the complicated stuff is to handle periodic boundaries */
373               PetscCall(ISColoringValueCast(l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)), colors + ii++));
374             }
375           }
376         }
377       }
378       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
379       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
380       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
381     }
382     *coloring = dd->ghostedcoloring;
383   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
384   PetscCall(ISColoringReference(*coloring));
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387 
DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)388 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389 {
390   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391   PetscInt         ncolors;
392   MPI_Comm         comm;
393   DMBoundaryType   bx;
394   ISColoringValue *colors;
395   DM_DA           *dd = (DM_DA *)da->data;
396 
397   PetscFunctionBegin;
398   /*
399          nc - number of components per grid point
400          col - number of colors needed in one direction for single component problem
401   */
402   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
403   col = 2 * s + 1;
404   PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col);
405   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
406   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
407   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
408 
409   /* create the coloring */
410   if (ctype == IS_COLORING_GLOBAL) {
411     if (!dd->localcoloring) {
412       PetscCall(PetscMalloc1(nc * nx, &colors));
413       if (dd->ofillcols) {
414         PetscInt tc = 0;
415         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
416         i1 = 0;
417         for (i = xs; i < xs + nx; i++) {
418           for (l = 0; l < nc; l++) {
419             if (dd->ofillcols[l] && (i % col)) {
420               PetscCall(ISColoringValueCast(nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l], colors + i1++));
421             } else {
422               PetscCall(ISColoringValueCast(l, colors + i1++));
423             }
424           }
425         }
426         ncolors = nc + 2 * s * tc;
427       } else {
428         i1 = 0;
429         for (i = xs; i < xs + nx; i++) {
430           for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * (i % col), colors + i1++));
431         }
432         ncolors = nc + nc * (col - 1);
433       }
434       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
435     }
436     *coloring = dd->localcoloring;
437   } else if (ctype == IS_COLORING_LOCAL) {
438     if (!dd->ghostedcoloring) {
439       PetscCall(PetscMalloc1(nc * gnx, &colors));
440       i1 = 0;
441       for (i = gxs; i < gxs + gnx; i++) {
442         for (l = 0; l < nc; l++) {
443           /* the complicated stuff is to handle periodic boundaries */
444           PetscCall(ISColoringValueCast(l + nc * (SetInRange(i, m) % col), colors + i1++));
445         }
446       }
447       ncolors = nc + nc * (col - 1);
448       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
449       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
450     }
451     *coloring = dd->ghostedcoloring;
452   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
453   PetscCall(ISColoringReference(*coloring));
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring * coloring)457 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
458 {
459   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
460   PetscInt         ncolors;
461   MPI_Comm         comm;
462   DMBoundaryType   bx, by;
463   ISColoringValue *colors;
464   DM_DA           *dd = (DM_DA *)da->data;
465 
466   PetscFunctionBegin;
467   /*
468          nc - number of components per grid point
469          col - number of colors needed in one direction for single component problem
470   */
471   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
472   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
473   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
474   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
475   /* create the coloring */
476   if (ctype == IS_COLORING_GLOBAL) {
477     if (!dd->localcoloring) {
478       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
479       ii = 0;
480       for (j = ys; j < ys + ny; j++) {
481         for (i = xs; i < xs + nx; i++) {
482           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * j + i) % 5), colors + ii++));
483         }
484       }
485       ncolors = 5 * nc;
486       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
487     }
488     *coloring = dd->localcoloring;
489   } else if (ctype == IS_COLORING_LOCAL) {
490     if (!dd->ghostedcoloring) {
491       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
492       ii = 0;
493       for (j = gys; j < gys + gny; j++) {
494         for (i = gxs; i < gxs + gnx; i++) {
495           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5), colors + ii++));
496         }
497       }
498       ncolors = 5 * nc;
499       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
500       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
501     }
502     *coloring = dd->ghostedcoloring;
503   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 /* =========================================================================== */
508 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
509 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
510 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
511 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
512 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
513 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
514 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
522 
MatView_MPI_DA(Mat A,PetscViewer viewer)523 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
524 {
525   DM                da;
526   const char       *prefix;
527   Mat               AA, Anatural;
528   AO                ao;
529   PetscInt          rstart, rend, *petsc, i;
530   IS                is;
531   MPI_Comm          comm;
532   PetscViewerFormat format;
533   PetscBool         flag;
534 
535   PetscFunctionBegin;
536   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
537   PetscCall(PetscViewerGetFormat(viewer, &format));
538   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
539 
540   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
541   PetscCall(MatGetDM(A, &da));
542   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
543 
544   PetscCall(DMDAGetAO(da, &ao));
545   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
546   PetscCall(PetscMalloc1(rend - rstart, &petsc));
547   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
548   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
549   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
550 
551   /* call viewer on natural ordering */
552   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
553   if (flag) {
554     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
555     A = AA;
556   }
557   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
558   PetscCall(ISDestroy(&is));
559   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
560   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
561   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
562   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
563   PetscCall(MatView(Anatural, viewer));
564   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
565   PetscCall(MatDestroy(&Anatural));
566   if (flag) PetscCall(MatDestroy(&AA));
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
MatLoad_MPI_DA(Mat A,PetscViewer viewer)570 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
571 {
572   DM       da;
573   Mat      Anatural, Aapp;
574   AO       ao;
575   PetscInt rstart, rend, *app, i, m, n, M, N;
576   IS       is;
577   MPI_Comm comm;
578 
579   PetscFunctionBegin;
580   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
581   PetscCall(MatGetDM(A, &da));
582   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
583 
584   /* Load the matrix in natural ordering */
585   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
586   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
587   PetscCall(MatGetSize(A, &M, &N));
588   PetscCall(MatGetLocalSize(A, &m, &n));
589   PetscCall(MatSetSizes(Anatural, m, n, M, N));
590   PetscCall(MatLoad(Anatural, viewer));
591 
592   /* Map natural ordering to application ordering and create IS */
593   PetscCall(DMDAGetAO(da, &ao));
594   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
595   PetscCall(PetscMalloc1(rend - rstart, &app));
596   for (i = rstart; i < rend; i++) app[i - rstart] = i;
597   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
598   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
599 
600   /* Do permutation and replace header */
601   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
602   PetscCall(MatHeaderReplace(A, &Aapp));
603   PetscCall(ISDestroy(&is));
604   PetscCall(MatDestroy(&Anatural));
605   PetscFunctionReturn(PETSC_SUCCESS);
606 }
607 
DMCreateMatrix_DA(DM da,Mat * J)608 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
609 {
610   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
611   Mat         A;
612   MPI_Comm    comm;
613   MatType     Atype;
614   MatType     mtype;
615   PetscMPIInt size;
616   DM_DA      *dd  = (DM_DA *)da->data;
617   PetscBool   aij = PETSC_FALSE, baij = PETSC_FALSE, sbaij = PETSC_FALSE, sell = PETSC_FALSE, is = PETSC_FALSE;
618 
619   PetscFunctionBegin;
620   PetscCall(MatInitializePackage());
621   mtype = da->mattype;
622 
623   /*
624                                   m
625           ------------------------------------------------------
626          |                                                     |
627          |                                                     |
628          |               ----------------------                |
629          |               |                    |                |
630       n  |           ny  |                    |                |
631          |               |                    |                |
632          |               .---------------------                |
633          |             (xs,ys)     nx                          |
634          |            .                                        |
635          |         (gxs,gys)                                   |
636          |                                                     |
637           -----------------------------------------------------
638   */
639 
640   /*
641          nc - number of components per grid point
642          col - number of colors needed in one direction for single component problem
643   */
644   M   = dd->M;
645   N   = dd->N;
646   P   = dd->P;
647   dim = da->dim;
648   dof = dd->w;
649   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
650   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
651   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
652   PetscCall(MatCreate(comm, &A));
653   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
654   PetscCall(MatSetType(A, mtype));
655   PetscCall(MatSetFromOptions(A));
656   if (dof * nx * ny * nz < da->bind_below) {
657     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
658     PetscCall(MatBindToCPU(A, PETSC_TRUE));
659   }
660   PetscCall(MatSetDM(A, da));
661   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
662   PetscCall(MatGetType(A, &Atype));
663   /*
664      We do not provide a getmatrix function in the DMDA operations because
665    the basic DMDA does not know about matrices. We think of DMDA as being more
666    more low-level than matrices. This is kind of cheating but, cause sometimes
667    we think of DMDA has higher level than matrices.
668 
669      We could switch based on Atype (or mtype), but we do not since the
670    specialized setting routines depend only on the particular preallocation
671    details of the matrix, not the type itself.
672   */
673   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
674     PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
675     if (!aij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
676     if (!aij) {
677       PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
678       if (!baij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
679       if (!baij) {
680         PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
681         if (!sbaij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
682         if (!sbaij) {
683           PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
684           if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
685         }
686         if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
687       }
688     }
689   }
690   if (aij) {
691     if (dim == 1) {
692       if (dd->ofill) {
693         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
694       } else {
695         DMBoundaryType bx;
696         PetscMPIInt    size;
697         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
698         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
699         if (size == 1 && bx == DM_BOUNDARY_NONE) {
700           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
701         } else {
702           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
703         }
704       }
705     } else if (dim == 2) {
706       if (dd->ofill) {
707         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
708       } else {
709         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
710       }
711     } else if (dim == 3) {
712       if (dd->ofill) {
713         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
714       } else {
715         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
716       }
717     }
718   } else if (baij) {
719     if (dim == 2) {
720       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
721     } else if (dim == 3) {
722       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
723     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
724   } else if (sbaij) {
725     if (dim == 2) {
726       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
727     } else if (dim == 3) {
728       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
729     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
730   } else if (sell) {
731     if (dim == 2) {
732       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
733     } else if (dim == 3) {
734       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
735     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
736   } else if (is) {
737     PetscCall(DMCreateMatrix_DA_IS(da, A));
738   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
739     ISLocalToGlobalMapping ltog;
740 
741     PetscCall(MatSetBlockSize(A, dof));
742     PetscCall(MatSetUp(A));
743     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
744     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
745   }
746   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
747   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
748   PetscCall(MatSetDM(A, da));
749   PetscCallMPI(MPI_Comm_size(comm, &size));
750   if (size > 1) {
751     /* change viewer to display matrix in natural ordering */
752     PetscCall(MatSetOperation(A, MATOP_VIEW, (PetscErrorCodeFn *)MatView_MPI_DA));
753     PetscCall(MatSetOperation(A, MATOP_LOAD, (PetscErrorCodeFn *)MatLoad_MPI_DA));
754   }
755   *J = A;
756   PetscFunctionReturn(PETSC_SUCCESS);
757 }
758 
DMCreateMatrix_DA_IS(DM dm,Mat J)759 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
760 {
761   DM_DA                 *da = (DM_DA *)dm->data;
762   Mat                    lJ, P;
763   ISLocalToGlobalMapping ltog;
764   IS                     is;
765   PetscBT                bt;
766   const PetscInt        *e_loc, *idx;
767   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
768 
769   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
770      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
771   PetscFunctionBegin;
772   dof = da->w;
773   PetscCall(MatSetBlockSize(J, dof));
774   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
775 
776   /* flag local elements indices in local DMDA numbering */
777   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
778   PetscCall(PetscBTCreate(nv / dof, &bt));
779   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
780   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
781 
782   /* filter out (set to -1) the global indices not used by the local elements */
783   PetscCall(PetscMalloc1(nv / dof, &gidx));
784   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
785   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
786   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
787   for (i = 0; i < nv / dof; i++)
788     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
789   PetscCall(PetscBTDestroy(&bt));
790   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
791   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
792   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
793   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
794   PetscCall(ISDestroy(&is));
795 
796   /* Preallocation */
797   if (dm->prealloc_skip) {
798     PetscCall(MatSetUp(J));
799   } else {
800     PetscCall(MatISGetLocalMat(J, &lJ));
801     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
802     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
803     PetscCall(MatSetType(P, MATPREALLOCATOR));
804     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
805     PetscCall(MatGetSize(lJ, &N, NULL));
806     PetscCall(MatGetLocalSize(lJ, &n, NULL));
807     PetscCall(MatSetSizes(P, n, n, N, N));
808     PetscCall(MatSetBlockSize(P, dof));
809     PetscCall(MatSetUp(P));
810     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
811     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
812     PetscCall(MatISRestoreLocalMat(J, &lJ));
813     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
814     PetscCall(MatDestroy(&P));
815 
816     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
817     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
818   }
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)822 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
823 {
824   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
825   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
826   MPI_Comm               comm;
827   PetscScalar           *values;
828   DMBoundaryType         bx, by;
829   ISLocalToGlobalMapping ltog;
830   DMDAStencilType        st;
831 
832   PetscFunctionBegin;
833   /*
834          nc - number of components per grid point
835          col - number of colors needed in one direction for single component problem
836   */
837   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
838   col = 2 * s + 1;
839   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
840   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
841   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
842 
843   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
844   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
845 
846   PetscCall(MatSetBlockSize(J, nc));
847   /* determine the matrix preallocation information */
848   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
849   for (i = xs; i < xs + nx; i++) {
850     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
851     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
852 
853     for (j = ys; j < ys + ny; j++) {
854       slot = i - gxs + gnx * (j - gys);
855 
856       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
857       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
858 
859       cnt = 0;
860       for (k = 0; k < nc; k++) {
861         for (l = lstart; l < lend + 1; l++) {
862           for (p = pstart; p < pend + 1; p++) {
863             if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
864               cols[cnt++] = k + nc * (slot + gnx * l + p);
865             }
866           }
867         }
868         rows[k] = k + nc * (slot);
869       }
870       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
871     }
872   }
873   PetscCall(MatSetBlockSize(J, nc));
874   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
875   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
876   MatPreallocateEnd(dnz, onz);
877 
878   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
879 
880   /*
881     For each node in the grid: we get the neighbors in the local (on processor ordering
882     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
883     PETSc ordering.
884   */
885   if (!da->prealloc_only) {
886     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
887     for (i = xs; i < xs + nx; i++) {
888       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
889       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
890 
891       for (j = ys; j < ys + ny; j++) {
892         slot = i - gxs + gnx * (j - gys);
893 
894         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
895         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
896 
897         cnt = 0;
898         for (k = 0; k < nc; k++) {
899           for (l = lstart; l < lend + 1; l++) {
900             for (p = pstart; p < pend + 1; p++) {
901               if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
902                 cols[cnt++] = k + nc * (slot + gnx * l + p);
903               }
904             }
905           }
906           rows[k] = k + nc * (slot);
907         }
908         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
909       }
910     }
911     PetscCall(PetscFree(values));
912     /* do not copy values to GPU since they are all zero and not yet needed there */
913     PetscCall(MatBindToCPU(J, PETSC_TRUE));
914     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
915     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
916     PetscCall(MatBindToCPU(J, PETSC_FALSE));
917     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
918   }
919   PetscCall(PetscFree2(rows, cols));
920   PetscFunctionReturn(PETSC_SUCCESS);
921 }
922 
DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)923 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
924 {
925   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
926   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
927   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
928   MPI_Comm               comm;
929   PetscScalar           *values;
930   DMBoundaryType         bx, by, bz;
931   ISLocalToGlobalMapping ltog;
932   DMDAStencilType        st;
933 
934   PetscFunctionBegin;
935   /*
936          nc - number of components per grid point
937          col - number of colors needed in one direction for single component problem
938   */
939   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
940   col = 2 * s + 1;
941   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
942   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
943   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
944 
945   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
946   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
947 
948   PetscCall(MatSetBlockSize(J, nc));
949   /* determine the matrix preallocation information */
950   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
951   for (i = xs; i < xs + nx; i++) {
952     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
953     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
954     for (j = ys; j < ys + ny; j++) {
955       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
956       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
957       for (k = zs; k < zs + nz; k++) {
958         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
959         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
960 
961         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
962 
963         cnt = 0;
964         for (l = 0; l < nc; l++) {
965           for (ii = istart; ii < iend + 1; ii++) {
966             for (jj = jstart; jj < jend + 1; jj++) {
967               for (kk = kstart; kk < kend + 1; kk++) {
968                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
969                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
970                 }
971               }
972             }
973           }
974           rows[l] = l + nc * (slot);
975         }
976         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
977       }
978     }
979   }
980   PetscCall(MatSetBlockSize(J, nc));
981   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
982   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
983   MatPreallocateEnd(dnz, onz);
984   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
985 
986   /*
987     For each node in the grid: we get the neighbors in the local (on processor ordering
988     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
989     PETSc ordering.
990   */
991   if (!da->prealloc_only) {
992     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
993     for (i = xs; i < xs + nx; i++) {
994       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
995       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
996       for (j = ys; j < ys + ny; j++) {
997         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
998         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
999         for (k = zs; k < zs + nz; k++) {
1000           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1001           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1002 
1003           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1004 
1005           cnt = 0;
1006           for (l = 0; l < nc; l++) {
1007             for (ii = istart; ii < iend + 1; ii++) {
1008               for (jj = jstart; jj < jend + 1; jj++) {
1009                 for (kk = kstart; kk < kend + 1; kk++) {
1010                   if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1011                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1012                   }
1013                 }
1014               }
1015             }
1016             rows[l] = l + nc * (slot);
1017           }
1018           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1019         }
1020       }
1021     }
1022     PetscCall(PetscFree(values));
1023     /* do not copy values to GPU since they are all zero and not yet needed there */
1024     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1025     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1026     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1027     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1028     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1029   }
1030   PetscCall(PetscFree2(rows, cols));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J)1034 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1035 {
1036   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1037   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1038   MPI_Comm               comm;
1039   DMBoundaryType         bx, by;
1040   ISLocalToGlobalMapping ltog, mltog;
1041   DMDAStencilType        st;
1042   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1043 
1044   PetscFunctionBegin;
1045   /*
1046          nc - number of components per grid point
1047          col - number of colors needed in one direction for single component problem
1048   */
1049   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1050   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1051   col = 2 * s + 1;
1052   /*
1053        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1054        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1055   */
1056   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1057   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1058   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1059   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1060   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1061 
1062   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1063   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1064 
1065   PetscCall(MatSetBlockSize(J, nc));
1066   /* determine the matrix preallocation information */
1067   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1068   for (i = xs; i < xs + nx; i++) {
1069     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1070     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1071 
1072     for (j = ys; j < ys + ny; j++) {
1073       slot = i - gxs + gnx * (j - gys);
1074 
1075       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1076       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1077 
1078       cnt = 0;
1079       for (k = 0; k < nc; k++) {
1080         for (l = lstart; l < lend + 1; l++) {
1081           for (p = pstart; p < pend + 1; p++) {
1082             if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
1083               cols[cnt++] = k + nc * (slot + gnx * l + p);
1084             }
1085           }
1086         }
1087         rows[k] = k + nc * (slot);
1088       }
1089       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1090       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1091     }
1092   }
1093   PetscCall(MatSetBlockSize(J, nc));
1094   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1095   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1096   MatPreallocateEnd(dnz, onz);
1097   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1098   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1099 
1100   /*
1101     For each node in the grid: we get the neighbors in the local (on processor ordering
1102     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1103     PETSc ordering.
1104   */
1105   if (!da->prealloc_only) {
1106     for (i = xs; i < xs + nx; i++) {
1107       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1108       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1109 
1110       for (j = ys; j < ys + ny; j++) {
1111         slot = i - gxs + gnx * (j - gys);
1112 
1113         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1114         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1115 
1116         cnt = 0;
1117         for (l = lstart; l < lend + 1; l++) {
1118           for (p = pstart; p < pend + 1; p++) {
1119             if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
1120               cols[cnt++] = nc * (slot + gnx * l + p);
1121               for (k = 1; k < nc; k++) {
1122                 cols[cnt] = 1 + cols[cnt - 1];
1123                 cnt++;
1124               }
1125             }
1126           }
1127         }
1128         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1129         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1130       }
1131     }
1132     /* do not copy values to GPU since they are all zero and not yet needed there */
1133     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1134     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1135     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1136     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1137     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1138     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1139     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1140   }
1141   PetscCall(PetscFree2(rows, cols));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)1145 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1146 {
1147   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1148   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1149   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1150   DM_DA                 *dd = (DM_DA *)da->data;
1151   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1152   MPI_Comm               comm;
1153   DMBoundaryType         bx, by;
1154   ISLocalToGlobalMapping ltog;
1155   DMDAStencilType        st;
1156   PetscBool              removedups = PETSC_FALSE;
1157 
1158   PetscFunctionBegin;
1159   /*
1160          nc - number of components per grid point
1161          col - number of colors needed in one direction for single component problem
1162   */
1163   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1164   col = 2 * s + 1;
1165   /*
1166        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1167        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1168   */
1169   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1170   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1171   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1172   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1173   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1174 
1175   PetscCall(PetscMalloc1(col * col * nc, &cols));
1176   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1177 
1178   PetscCall(MatSetBlockSize(J, nc));
1179   /* determine the matrix preallocation information */
1180   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1181   for (i = xs; i < xs + nx; i++) {
1182     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1183     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1184 
1185     for (j = ys; j < ys + ny; j++) {
1186       slot = i - gxs + gnx * (j - gys);
1187 
1188       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1189       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1190 
1191       for (k = 0; k < nc; k++) {
1192         cnt = 0;
1193         for (l = lstart; l < lend + 1; l++) {
1194           for (p = pstart; p < pend + 1; p++) {
1195             if (l || p) {
1196               if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */
1197                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1198               }
1199             } else {
1200               if (dfill) {
1201                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1202               } else {
1203                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1204               }
1205             }
1206           }
1207         }
1208         row    = k + nc * (slot);
1209         maxcnt = PetscMax(maxcnt, cnt);
1210         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1211         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1212       }
1213     }
1214   }
1215   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1216   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1217   MatPreallocateEnd(dnz, onz);
1218   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1219 
1220   /*
1221     For each node in the grid: we get the neighbors in the local (on processor ordering
1222     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1223     PETSc ordering.
1224   */
1225   if (!da->prealloc_only) {
1226     for (i = xs; i < xs + nx; i++) {
1227       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1228       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1229 
1230       for (j = ys; j < ys + ny; j++) {
1231         slot = i - gxs + gnx * (j - gys);
1232 
1233         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1234         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1235 
1236         for (k = 0; k < nc; k++) {
1237           cnt = 0;
1238           for (l = lstart; l < lend + 1; l++) {
1239             for (p = pstart; p < pend + 1; p++) {
1240               if (l || p) {
1241                 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */
1242                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1243                 }
1244               } else {
1245                 if (dfill) {
1246                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1247                 } else {
1248                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1249                 }
1250               }
1251             }
1252           }
1253           row = k + nc * (slot);
1254           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1255         }
1256       }
1257     }
1258     /* do not copy values to GPU since they are all zero and not yet needed there */
1259     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1260     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1261     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1262     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1263     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1264   }
1265   PetscCall(PetscFree(cols));
1266   PetscFunctionReturn(PETSC_SUCCESS);
1267 }
1268 
DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J)1269 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1270 {
1271   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1272   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1273   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1274   MPI_Comm               comm;
1275   DMBoundaryType         bx, by, bz;
1276   ISLocalToGlobalMapping ltog, mltog;
1277   DMDAStencilType        st;
1278   PetscBool              removedups = PETSC_FALSE;
1279 
1280   PetscFunctionBegin;
1281   /*
1282          nc - number of components per grid point
1283          col - number of colors needed in one direction for single component problem
1284   */
1285   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1286   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1287   col = 2 * s + 1;
1288 
1289   /*
1290        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1291        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1292   */
1293   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1294   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1295   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1296 
1297   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1298   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1299   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1300 
1301   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1302   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1303 
1304   PetscCall(MatSetBlockSize(J, nc));
1305   /* determine the matrix preallocation information */
1306   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1307   for (i = xs; i < xs + nx; i++) {
1308     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1309     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1310     for (j = ys; j < ys + ny; j++) {
1311       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1312       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1313       for (k = zs; k < zs + nz; k++) {
1314         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1315         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1316 
1317         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1318 
1319         cnt = 0;
1320         for (l = 0; l < nc; l++) {
1321           for (ii = istart; ii < iend + 1; ii++) {
1322             for (jj = jstart; jj < jend + 1; jj++) {
1323               for (kk = kstart; kk < kend + 1; kk++) {
1324                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1325                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1326                 }
1327               }
1328             }
1329           }
1330           rows[l] = l + nc * (slot);
1331         }
1332         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1333         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1334       }
1335     }
1336   }
1337   PetscCall(MatSetBlockSize(J, nc));
1338   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1339   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1340   MatPreallocateEnd(dnz, onz);
1341   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1342   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1343 
1344   /*
1345     For each node in the grid: we get the neighbors in the local (on processor ordering
1346     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1347     PETSc ordering.
1348   */
1349   if (!da->prealloc_only) {
1350     for (i = xs; i < xs + nx; i++) {
1351       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1352       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1353       for (j = ys; j < ys + ny; j++) {
1354         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1355         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1356         for (k = zs; k < zs + nz; k++) {
1357           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1358           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1359 
1360           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1361 
1362           cnt = 0;
1363           for (kk = kstart; kk < kend + 1; kk++) {
1364             for (jj = jstart; jj < jend + 1; jj++) {
1365               for (ii = istart; ii < iend + 1; ii++) {
1366                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1367                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1368                   for (l = 1; l < nc; l++) {
1369                     cols[cnt] = 1 + cols[cnt - 1];
1370                     cnt++;
1371                   }
1372                 }
1373               }
1374             }
1375           }
1376           rows[0] = nc * (slot);
1377           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1378           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1379         }
1380       }
1381     }
1382     /* do not copy values to GPU since they are all zero and not yet needed there */
1383     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1384     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1385     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1386     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1387     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1388     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1389   }
1390   PetscCall(PetscFree2(rows, cols));
1391   PetscFunctionReturn(PETSC_SUCCESS);
1392 }
1393 
DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)1394 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1395 {
1396   DM_DA                 *dd = (DM_DA *)da->data;
1397   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1398   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1399   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1400   DMBoundaryType         bx;
1401   ISLocalToGlobalMapping ltog;
1402   PetscMPIInt            rank, size;
1403 
1404   PetscFunctionBegin;
1405   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1406   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1407 
1408   /*
1409          nc - number of components per grid point
1410   */
1411   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1412   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1413   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1414   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1415 
1416   PetscCall(MatSetBlockSize(J, nc));
1417   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1418 
1419   /*
1420         note should be smaller for first and last process with no periodic
1421         does not handle dfill
1422   */
1423   cnt = 0;
1424   /* coupling with process to the left */
1425   for (i = 0; i < s; i++) {
1426     for (j = 0; j < nc; j++) {
1427       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1428       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1429       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1430         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1431         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1432       }
1433       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1434       cnt++;
1435     }
1436   }
1437   for (i = s; i < nx - s; i++) {
1438     for (j = 0; j < nc; j++) {
1439       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1440       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1441       cnt++;
1442     }
1443   }
1444   /* coupling with process to the right */
1445   for (i = nx - s; i < nx; i++) {
1446     for (j = 0; j < nc; j++) {
1447       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1448       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1449       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1450         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1451         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1452       }
1453       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1454       cnt++;
1455     }
1456   }
1457 
1458   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1459   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1460   PetscCall(PetscFree2(cols, ocols));
1461 
1462   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1463   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1464 
1465   /*
1466     For each node in the grid: we get the neighbors in the local (on processor ordering
1467     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1468     PETSc ordering.
1469   */
1470   if (!da->prealloc_only) {
1471     PetscCall(PetscMalloc1(maxcnt, &cols));
1472     row = xs * nc;
1473     /* coupling with process to the left */
1474     for (i = xs; i < xs + s; i++) {
1475       for (j = 0; j < nc; j++) {
1476         cnt = 0;
1477         if (rank) {
1478           for (l = 0; l < s; l++) {
1479             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1480           }
1481         }
1482         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1483           for (l = 0; l < s; l++) {
1484             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1485           }
1486         }
1487         if (dfill) {
1488           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1489         } else {
1490           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1491         }
1492         for (l = 0; l < s; l++) {
1493           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1494         }
1495         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1496         row++;
1497       }
1498     }
1499     for (i = xs + s; i < xs + nx - s; i++) {
1500       for (j = 0; j < nc; j++) {
1501         cnt = 0;
1502         for (l = 0; l < s; l++) {
1503           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1504         }
1505         if (dfill) {
1506           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1507         } else {
1508           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1509         }
1510         for (l = 0; l < s; l++) {
1511           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1512         }
1513         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1514         row++;
1515       }
1516     }
1517     /* coupling with process to the right */
1518     for (i = xs + nx - s; i < xs + nx; i++) {
1519       for (j = 0; j < nc; j++) {
1520         cnt = 0;
1521         for (l = 0; l < s; l++) {
1522           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1523         }
1524         if (dfill) {
1525           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1526         } else {
1527           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1528         }
1529         if (rank < size - 1) {
1530           for (l = 0; l < s; l++) {
1531             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1532           }
1533         }
1534         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1535           for (l = 0; l < s; l++) {
1536             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1537           }
1538         }
1539         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1540         row++;
1541       }
1542     }
1543     PetscCall(PetscFree(cols));
1544     /* do not copy values to GPU since they are all zero and not yet needed there */
1545     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1546     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1547     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1548     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1549     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1550   }
1551   PetscFunctionReturn(PETSC_SUCCESS);
1552 }
1553 
DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J)1554 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1555 {
1556   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1557   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1558   PetscInt               istart, iend;
1559   DMBoundaryType         bx;
1560   ISLocalToGlobalMapping ltog, mltog;
1561 
1562   PetscFunctionBegin;
1563   /*
1564          nc - number of components per grid point
1565          col - number of colors needed in one direction for single component problem
1566   */
1567   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1568   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1569   col = 2 * s + 1;
1570 
1571   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1572   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1573 
1574   PetscCall(MatSetBlockSize(J, nc));
1575   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1576   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1577 
1578   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1579   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1580   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1581 
1582   /*
1583     For each node in the grid: we get the neighbors in the local (on processor ordering
1584     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1585     PETSc ordering.
1586   */
1587   if (!da->prealloc_only) {
1588     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1589     for (i = xs; i < xs + nx; i++) {
1590       istart = PetscMax(-s, gxs - i);
1591       iend   = PetscMin(s, gxs + gnx - i - 1);
1592       slot   = i - gxs;
1593 
1594       cnt = 0;
1595       for (i1 = istart; i1 < iend + 1; i1++) {
1596         cols[cnt++] = nc * (slot + i1);
1597         for (l = 1; l < nc; l++) {
1598           cols[cnt] = 1 + cols[cnt - 1];
1599           cnt++;
1600         }
1601       }
1602       rows[0] = nc * (slot);
1603       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1604       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1605     }
1606     /* do not copy values to GPU since they are all zero and not yet needed there */
1607     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1608     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1609     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1610     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1611     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1612     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1613     PetscCall(PetscFree2(rows, cols));
1614   }
1615   PetscFunctionReturn(PETSC_SUCCESS);
1616 }
1617 
DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J)1618 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1619 {
1620   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1621   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1622   PetscInt               istart, iend;
1623   DMBoundaryType         bx;
1624   ISLocalToGlobalMapping ltog, mltog;
1625 
1626   PetscFunctionBegin;
1627   /*
1628          nc - number of components per grid point
1629          col - number of colors needed in one direction for single component problem
1630   */
1631   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1632   col = 2 * s + 1;
1633 
1634   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1635   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1636 
1637   PetscCall(MatSetBlockSize(J, nc));
1638   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1639 
1640   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1641   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1642   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1643 
1644   /*
1645     For each node in the grid: we get the neighbors in the local (on processor ordering
1646     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1647     PETSc ordering.
1648   */
1649   if (!da->prealloc_only) {
1650     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1651     for (i = xs; i < xs + nx; i++) {
1652       istart = PetscMax(-s, gxs - i);
1653       iend   = PetscMin(s, gxs + gnx - i - 1);
1654       slot   = i - gxs;
1655 
1656       cnt = 0;
1657       for (i1 = istart; i1 < iend + 1; i1++) {
1658         cols[cnt++] = nc * (slot + i1);
1659         for (l = 1; l < nc; l++) {
1660           cols[cnt] = 1 + cols[cnt - 1];
1661           cnt++;
1662         }
1663       }
1664       rows[0] = nc * (slot);
1665       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1666       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1667     }
1668     /* do not copy values to GPU since they are all zero and not yet needed there */
1669     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1670     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1671     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1672     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1673     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1674     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1675     PetscCall(PetscFree2(rows, cols));
1676   }
1677   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1678   PetscFunctionReturn(PETSC_SUCCESS);
1679 }
1680 
DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)1681 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1682 {
1683   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1684   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1685   PetscInt               istart, iend, jstart, jend, ii, jj;
1686   MPI_Comm               comm;
1687   PetscScalar           *values;
1688   DMBoundaryType         bx, by;
1689   DMDAStencilType        st;
1690   ISLocalToGlobalMapping ltog;
1691 
1692   PetscFunctionBegin;
1693   /*
1694      nc - number of components per grid point
1695      col - number of colors needed in one direction for single component problem
1696   */
1697   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1698   col = 2 * s + 1;
1699 
1700   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1701   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1702   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1703 
1704   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1705 
1706   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1707 
1708   /* determine the matrix preallocation information */
1709   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1710   for (i = xs; i < xs + nx; i++) {
1711     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1712     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1713     for (j = ys; j < ys + ny; j++) {
1714       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1715       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1716       slot   = i - gxs + gnx * (j - gys);
1717 
1718       /* Find block columns in block row */
1719       cnt = 0;
1720       for (ii = istart; ii < iend + 1; ii++) {
1721         for (jj = jstart; jj < jend + 1; jj++) {
1722           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1723             cols[cnt++] = slot + ii + gnx * jj;
1724           }
1725         }
1726       }
1727       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1728     }
1729   }
1730   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1731   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1732   MatPreallocateEnd(dnz, onz);
1733 
1734   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1735 
1736   /*
1737     For each node in the grid: we get the neighbors in the local (on processor ordering
1738     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1739     PETSc ordering.
1740   */
1741   if (!da->prealloc_only) {
1742     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1743     for (i = xs; i < xs + nx; i++) {
1744       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1745       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1746       for (j = ys; j < ys + ny; j++) {
1747         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1748         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1749         slot   = i - gxs + gnx * (j - gys);
1750         cnt    = 0;
1751         for (ii = istart; ii < iend + 1; ii++) {
1752           for (jj = jstart; jj < jend + 1; jj++) {
1753             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1754               cols[cnt++] = slot + ii + gnx * jj;
1755             }
1756           }
1757         }
1758         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1759       }
1760     }
1761     PetscCall(PetscFree(values));
1762     /* do not copy values to GPU since they are all zero and not yet needed there */
1763     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1764     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1765     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1766     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1767     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1768   }
1769   PetscCall(PetscFree(cols));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)1773 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1774 {
1775   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1776   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1777   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1778   MPI_Comm               comm;
1779   PetscScalar           *values;
1780   DMBoundaryType         bx, by, bz;
1781   DMDAStencilType        st;
1782   ISLocalToGlobalMapping ltog;
1783 
1784   PetscFunctionBegin;
1785   /*
1786          nc - number of components per grid point
1787          col - number of colors needed in one direction for single component problem
1788   */
1789   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1790   col = 2 * s + 1;
1791 
1792   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1793   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1794   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1795 
1796   PetscCall(PetscMalloc1(col * col * col, &cols));
1797 
1798   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1799 
1800   /* determine the matrix preallocation information */
1801   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1802   for (i = xs; i < xs + nx; i++) {
1803     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1804     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1805     for (j = ys; j < ys + ny; j++) {
1806       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1807       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1808       for (k = zs; k < zs + nz; k++) {
1809         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1810         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1811 
1812         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1813 
1814         /* Find block columns in block row */
1815         cnt = 0;
1816         for (ii = istart; ii < iend + 1; ii++) {
1817           for (jj = jstart; jj < jend + 1; jj++) {
1818             for (kk = kstart; kk < kend + 1; kk++) {
1819               if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1820                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1821               }
1822             }
1823           }
1824         }
1825         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1826       }
1827     }
1828   }
1829   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1830   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1831   MatPreallocateEnd(dnz, onz);
1832 
1833   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1834 
1835   /*
1836     For each node in the grid: we get the neighbors in the local (on processor ordering
1837     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1838     PETSc ordering.
1839   */
1840   if (!da->prealloc_only) {
1841     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1842     for (i = xs; i < xs + nx; i++) {
1843       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1844       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1845       for (j = ys; j < ys + ny; j++) {
1846         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1847         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1848         for (k = zs; k < zs + nz; k++) {
1849           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1850           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1851 
1852           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1853 
1854           cnt = 0;
1855           for (ii = istart; ii < iend + 1; ii++) {
1856             for (jj = jstart; jj < jend + 1; jj++) {
1857               for (kk = kstart; kk < kend + 1; kk++) {
1858                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1859                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1860                 }
1861               }
1862             }
1863           }
1864           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1865         }
1866       }
1867     }
1868     PetscCall(PetscFree(values));
1869     /* do not copy values to GPU since they are all zero and not yet needed there */
1870     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1871     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1872     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1873     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1874     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1875   }
1876   PetscCall(PetscFree(cols));
1877   PetscFunctionReturn(PETSC_SUCCESS);
1878 }
1879 
1880 /*
1881   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1882   identify in the local ordering with periodic domain.
1883 */
L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt * row,PetscInt * cnt,PetscInt col[])1884 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1885 {
1886   PetscInt i, n;
1887 
1888   PetscFunctionBegin;
1889   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1890   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1891   for (i = 0, n = 0; i < *cnt; i++) {
1892     if (col[i] >= *row) col[n++] = col[i];
1893   }
1894   *cnt = n;
1895   PetscFunctionReturn(PETSC_SUCCESS);
1896 }
1897 
DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)1898 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1899 {
1900   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1901   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1902   PetscInt               istart, iend, jstart, jend, ii, jj;
1903   MPI_Comm               comm;
1904   PetscScalar           *values;
1905   DMBoundaryType         bx, by;
1906   DMDAStencilType        st;
1907   ISLocalToGlobalMapping ltog;
1908 
1909   PetscFunctionBegin;
1910   /*
1911      nc - number of components per grid point
1912      col - number of colors needed in one direction for single component problem
1913   */
1914   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1915   col = 2 * s + 1;
1916 
1917   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1918   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1919   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1920 
1921   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1922 
1923   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1924 
1925   /* determine the matrix preallocation information */
1926   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1927   for (i = xs; i < xs + nx; i++) {
1928     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1929     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1930     for (j = ys; j < ys + ny; j++) {
1931       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1932       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1933       slot   = i - gxs + gnx * (j - gys);
1934 
1935       /* Find block columns in block row */
1936       cnt = 0;
1937       for (ii = istart; ii < iend + 1; ii++) {
1938         for (jj = jstart; jj < jend + 1; jj++) {
1939           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1940         }
1941       }
1942       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1943       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1944     }
1945   }
1946   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1947   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1948   MatPreallocateEnd(dnz, onz);
1949 
1950   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1951 
1952   /*
1953     For each node in the grid: we get the neighbors in the local (on processor ordering
1954     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1955     PETSc ordering.
1956   */
1957   if (!da->prealloc_only) {
1958     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1959     for (i = xs; i < xs + nx; i++) {
1960       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1961       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1962       for (j = ys; j < ys + ny; j++) {
1963         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1964         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1965         slot   = i - gxs + gnx * (j - gys);
1966 
1967         /* Find block columns in block row */
1968         cnt = 0;
1969         for (ii = istart; ii < iend + 1; ii++) {
1970           for (jj = jstart; jj < jend + 1; jj++) {
1971             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1972           }
1973         }
1974         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1975         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1976       }
1977     }
1978     PetscCall(PetscFree(values));
1979     /* do not copy values to GPU since they are all zero and not yet needed there */
1980     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1981     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1982     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1983     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1984     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1985   }
1986   PetscCall(PetscFree(cols));
1987   PetscFunctionReturn(PETSC_SUCCESS);
1988 }
1989 
DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)1990 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1991 {
1992   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1993   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1994   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1995   MPI_Comm               comm;
1996   PetscScalar           *values;
1997   DMBoundaryType         bx, by, bz;
1998   DMDAStencilType        st;
1999   ISLocalToGlobalMapping ltog;
2000 
2001   PetscFunctionBegin;
2002   /*
2003      nc - number of components per grid point
2004      col - number of colors needed in one direction for single component problem
2005   */
2006   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2007   col = 2 * s + 1;
2008 
2009   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2010   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2011   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2012 
2013   /* create the matrix */
2014   PetscCall(PetscMalloc1(col * col * col, &cols));
2015 
2016   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2017 
2018   /* determine the matrix preallocation information */
2019   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2020   for (i = xs; i < xs + nx; i++) {
2021     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2022     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2023     for (j = ys; j < ys + ny; j++) {
2024       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2025       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2026       for (k = zs; k < zs + nz; k++) {
2027         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2028         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2029 
2030         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2031 
2032         /* Find block columns in block row */
2033         cnt = 0;
2034         for (ii = istart; ii < iend + 1; ii++) {
2035           for (jj = jstart; jj < jend + 1; jj++) {
2036             for (kk = kstart; kk < kend + 1; kk++) {
2037               if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2038             }
2039           }
2040         }
2041         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2042         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2043       }
2044     }
2045   }
2046   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2047   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2048   MatPreallocateEnd(dnz, onz);
2049 
2050   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2051 
2052   /*
2053     For each node in the grid: we get the neighbors in the local (on processor ordering
2054     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2055     PETSc ordering.
2056   */
2057   if (!da->prealloc_only) {
2058     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2059     for (i = xs; i < xs + nx; i++) {
2060       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2061       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2062       for (j = ys; j < ys + ny; j++) {
2063         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2064         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2065         for (k = zs; k < zs + nz; k++) {
2066           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2067           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2068 
2069           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2070 
2071           cnt = 0;
2072           for (ii = istart; ii < iend + 1; ii++) {
2073             for (jj = jstart; jj < jend + 1; jj++) {
2074               for (kk = kstart; kk < kend + 1; kk++) {
2075                 if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2076               }
2077             }
2078           }
2079           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2080           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2081         }
2082       }
2083     }
2084     PetscCall(PetscFree(values));
2085     /* do not copy values to GPU since they are all zero and not yet needed there */
2086     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2087     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2088     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2089     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2090     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2091   }
2092   PetscCall(PetscFree(cols));
2093   PetscFunctionReturn(PETSC_SUCCESS);
2094 }
2095 
DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)2096 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2097 {
2098   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2099   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2100   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2101   DM_DA                 *dd = (DM_DA *)da->data;
2102   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2103   MPI_Comm               comm;
2104   PetscScalar           *values;
2105   DMBoundaryType         bx, by, bz;
2106   ISLocalToGlobalMapping ltog;
2107   DMDAStencilType        st;
2108   PetscBool              removedups = PETSC_FALSE;
2109 
2110   PetscFunctionBegin;
2111   /*
2112          nc - number of components per grid point
2113          col - number of colors needed in one direction for single component problem
2114   */
2115   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2116   col = 2 * s + 1;
2117 
2118   /*
2119        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2120        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2121   */
2122   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2123   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2124   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2125 
2126   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2127   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2128   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2129 
2130   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2131   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2132 
2133   /* determine the matrix preallocation information */
2134   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2135 
2136   PetscCall(MatSetBlockSize(J, nc));
2137   for (i = xs; i < xs + nx; i++) {
2138     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2139     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2140     for (j = ys; j < ys + ny; j++) {
2141       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2142       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2143       for (k = zs; k < zs + nz; k++) {
2144         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2145         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2146 
2147         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2148 
2149         for (l = 0; l < nc; l++) {
2150           cnt = 0;
2151           for (ii = istart; ii < iend + 1; ii++) {
2152             for (jj = jstart; jj < jend + 1; jj++) {
2153               for (kk = kstart; kk < kend + 1; kk++) {
2154                 if (ii || jj || kk) {
2155                   if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2156                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2157                   }
2158                 } else {
2159                   if (dfill) {
2160                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2161                   } else {
2162                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2163                   }
2164                 }
2165               }
2166             }
2167           }
2168           row    = l + nc * (slot);
2169           maxcnt = PetscMax(maxcnt, cnt);
2170           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2171           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2172         }
2173       }
2174     }
2175   }
2176   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2177   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2178   MatPreallocateEnd(dnz, onz);
2179   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2180 
2181   /*
2182     For each node in the grid: we get the neighbors in the local (on processor ordering
2183     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2184     PETSc ordering.
2185   */
2186   if (!da->prealloc_only) {
2187     PetscCall(PetscCalloc1(maxcnt, &values));
2188     for (i = xs; i < xs + nx; i++) {
2189       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2190       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2191       for (j = ys; j < ys + ny; j++) {
2192         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2193         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2194         for (k = zs; k < zs + nz; k++) {
2195           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2196           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2197 
2198           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2199 
2200           for (l = 0; l < nc; l++) {
2201             cnt = 0;
2202             for (ii = istart; ii < iend + 1; ii++) {
2203               for (jj = jstart; jj < jend + 1; jj++) {
2204                 for (kk = kstart; kk < kend + 1; kk++) {
2205                   if (ii || jj || kk) {
2206                     if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2207                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2208                     }
2209                   } else {
2210                     if (dfill) {
2211                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2212                     } else {
2213                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2214                     }
2215                   }
2216                 }
2217               }
2218             }
2219             row = l + nc * (slot);
2220             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2221           }
2222         }
2223       }
2224     }
2225     PetscCall(PetscFree(values));
2226     /* do not copy values to GPU since they are all zero and not yet needed there */
2227     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2228     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2229     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2230     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2231     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2232   }
2233   PetscCall(PetscFree(cols));
2234   PetscFunctionReturn(PETSC_SUCCESS);
2235 }
2236