xref: /petsc/src/snes/tutorials/ex56.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 /* Portions of this code are under:
2    Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved.
3 */
4 static char help[] = "3D, tensor hexahedra (Q1-K), displacement finite element formulation\n\
5 of linear elasticity.  E=1.0, nu=1/3.\n\
6 Unit cube domain with Dirichlet boundary\n\n";
7 
8 #include <petscdmplex.h>
9 #include <petscsnes.h>
10 #include <petscds.h>
11 #include <petscdmforest.h>
12 
13 static PetscReal s_soft_alpha = 1.e-3;
14 static PetscReal s_mu         = 0.4;
15 static PetscReal s_lambda     = 0.4;
16 
17 static void f0_bd_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
18 {
19   f0[0] = 1;     /* x direction pull */
20   f0[1] = -x[2]; /* add a twist around x-axis */
21   f0[2] = x[1];
22 }
23 
24 static void f1_bd_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
25 {
26   const PetscInt Ncomp = dim;
27   PetscInt       comp, d;
28   for (comp = 0; comp < Ncomp; ++comp) {
29     for (d = 0; d < dim; ++d) f1[comp * dim + d] = 0.0;
30   }
31 }
32 
33 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
34 static void f1_u_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
35 {
36   PetscReal trace, mu = s_mu, lambda = s_lambda, rad;
37   PetscInt  i, j;
38   for (i = 0, rad = 0.; i < dim; i++) {
39     PetscReal t = x[i];
40     rad += t * t;
41   }
42   rad = PetscSqrtReal(rad);
43   if (rad > 0.25) {
44     mu *= s_soft_alpha;
45     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
46   }
47   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
48   for (i = 0; i < dim; ++i) {
49     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
50     f1[i * dim + i] += lambda * trace;
51   }
52 }
53 
54 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
55 static void f1_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
56 {
57   PetscReal trace, mu = s_mu, lambda = s_lambda;
58   PetscInt  i, j;
59   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
60   for (i = 0; i < dim; ++i) {
61     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
62     f1[i * dim + i] += lambda * trace;
63   }
64 }
65 
66 /* 3D elasticity */
67 #define IDX(ii, jj, kk, ll) (27 * ii + 9 * jj + 3 * kk + ll)
68 
69 void g3_uu_3d_private(PetscScalar g3[], const PetscReal mu, const PetscReal lambda)
70 {
71   if (1) {
72     g3[0] += lambda;
73     g3[0] += mu;
74     g3[0] += mu;
75     g3[4] += lambda;
76     g3[8] += lambda;
77     g3[10] += mu;
78     g3[12] += mu;
79     g3[20] += mu;
80     g3[24] += mu;
81     g3[28] += mu;
82     g3[30] += mu;
83     g3[36] += lambda;
84     g3[40] += lambda;
85     g3[40] += mu;
86     g3[40] += mu;
87     g3[44] += lambda;
88     g3[50] += mu;
89     g3[52] += mu;
90     g3[56] += mu;
91     g3[60] += mu;
92     g3[68] += mu;
93     g3[70] += mu;
94     g3[72] += lambda;
95     g3[76] += lambda;
96     g3[80] += lambda;
97     g3[80] += mu;
98     g3[80] += mu;
99   } else {
100     int        i, j, k, l;
101     static int cc = -1;
102     cc++;
103     for (i = 0; i < 3; ++i) {
104       for (j = 0; j < 3; ++j) {
105         for (k = 0; k < 3; ++k) {
106           for (l = 0; l < 3; ++l) {
107             if (k == l && i == j) g3[IDX(i, j, k, l)] += lambda;
108             if (i == k && j == l) g3[IDX(i, j, k, l)] += mu;
109             if (i == l && j == k) g3[IDX(i, j, k, l)] += mu;
110             if (k == l && i == j && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += lambda;\n", IDX(i, j, k, l));
111             if (i == k && j == l && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
112             if (i == l && j == k && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
113           }
114         }
115       }
116     }
117   }
118 }
119 
120 static void g3_uu_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
121 {
122   PetscReal mu = s_mu, lambda = s_lambda, rad;
123   PetscInt  i;
124   for (i = 0, rad = 0.; i < dim; i++) {
125     PetscReal t = x[i];
126     rad += t * t;
127   }
128   rad = PetscSqrtReal(rad);
129   if (rad > 0.25) {
130     mu *= s_soft_alpha;
131     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
132   }
133   g3_uu_3d_private(g3, mu, lambda);
134 }
135 
136 static void g3_uu_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
137 {
138   g3_uu_3d_private(g3, s_mu, s_lambda);
139 }
140 
141 static void f0_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
142 {
143   const PetscInt Ncomp = dim;
144   PetscInt       comp;
145 
146   for (comp = 0; comp < Ncomp; ++comp) f0[comp] = 0.0;
147 }
148 
149 /* PI_i (x_i^4 - x_i^2) */
150 static void f0_u_x4(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
151 {
152   const PetscInt Ncomp = dim;
153   PetscInt       comp, i;
154 
155   for (comp = 0; comp < Ncomp; ++comp) {
156     f0[comp] = 1e5;
157     for (i = 0; i < Ncomp; ++i) { f0[comp] *= /* (comp+1)* */ (x[i] * x[i] * x[i] * x[i] - x[i] * x[i]); /* assumes (0,1]^D domain */ }
158   }
159 }
160 
161 PetscErrorCode zero(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, void *ctx)
162 {
163   const PetscInt Ncomp = dim;
164   PetscInt       comp;
165 
166   for (comp = 0; comp < Ncomp; ++comp) u[comp] = 0;
167   return 0;
168 }
169 
170 int main(int argc, char **args)
171 {
172   Mat         Amat;
173   SNES        snes;
174   KSP         ksp;
175   MPI_Comm    comm;
176   PetscMPIInt rank;
177 #if defined(PETSC_USE_LOG)
178   PetscLogStage stage[17];
179 #endif
180   PetscBool         test_nonzero_cols = PETSC_FALSE, use_nearnullspace = PETSC_TRUE, attach_nearnullspace = PETSC_FALSE;
181   Vec               xx, bb;
182   PetscInt          iter, i, N, dim = 3, cells[3] = {1, 1, 1}, max_conv_its, local_sizes[7], run_type = 1;
183   DM                dm, distdm, basedm;
184   PetscBool         flg;
185   char              convType[256];
186   PetscReal         Lx, mdisp[10], err[10];
187   const char *const options[10] = {"-ex56_dm_refine 0", "-ex56_dm_refine 1", "-ex56_dm_refine 2", "-ex56_dm_refine 3", "-ex56_dm_refine 4", "-ex56_dm_refine 5", "-ex56_dm_refine 6", "-ex56_dm_refine 7", "-ex56_dm_refine 8", "-ex56_dm_refine 9"};
188   PetscFunctionBeginUser;
189   PetscFunctionBeginUser;
190   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
191   comm = PETSC_COMM_WORLD;
192   PetscCallMPI(MPI_Comm_rank(comm, &rank));
193   /* options */
194   PetscOptionsBegin(comm, NULL, "3D bilinear Q1 elasticity options", "");
195   {
196     i = 3;
197     PetscCall(PetscOptionsIntArray("-cells", "Number of (flux tube) processor in each dimension", "ex56.c", cells, &i, NULL));
198 
199     Lx           = 1.; /* or ne for rod */
200     max_conv_its = 3;
201     PetscCall(PetscOptionsInt("-max_conv_its", "Number of iterations in convergence study", "", max_conv_its, &max_conv_its, NULL));
202     PetscCheck(max_conv_its > 0 && max_conv_its < 7, PETSC_COMM_WORLD, PETSC_ERR_USER, "Bad number of iterations for convergence test (%" PetscInt_FMT ")", max_conv_its);
203     PetscCall(PetscOptionsReal("-lx", "Length of domain", "", Lx, &Lx, NULL));
204     PetscCall(PetscOptionsReal("-alpha", "material coefficient inside circle", "", s_soft_alpha, &s_soft_alpha, NULL));
205     PetscCall(PetscOptionsBool("-test_nonzero_cols", "nonzero test", "", test_nonzero_cols, &test_nonzero_cols, NULL));
206     PetscCall(PetscOptionsBool("-use_mat_nearnullspace", "MatNearNullSpace API test", "", use_nearnullspace, &use_nearnullspace, NULL));
207     PetscCall(PetscOptionsBool("-attach_mat_nearnullspace", "MatNearNullSpace API test (via MatSetNearNullSpace)", "", attach_nearnullspace, &attach_nearnullspace, NULL));
208     PetscCall(PetscOptionsInt("-run_type", "0: twisting load on cantalever, 1: 3rd order accurate convergence test", "", run_type, &run_type, NULL));
209   }
210   PetscOptionsEnd();
211   PetscCall(PetscLogStageRegister("Mesh Setup", &stage[16]));
212   for (iter = 0; iter < max_conv_its; iter++) {
213     char str[] = "Solve 0";
214     str[6] += iter;
215     PetscCall(PetscLogStageRegister(str, &stage[iter]));
216   }
217   /* create DM, Plex calls DMSetup */
218   PetscCall(PetscLogStagePush(stage[16]));
219   PetscCall(DMPlexCreateBoxMesh(comm, dim, PETSC_FALSE, cells, NULL, NULL, NULL, PETSC_TRUE, &dm));
220   {
221     DMLabel label;
222     IS      is;
223     PetscCall(DMCreateLabel(dm, "boundary"));
224     PetscCall(DMGetLabel(dm, "boundary", &label));
225     PetscCall(DMPlexMarkBoundaryFaces(dm, 1, label));
226     if (run_type == 0) {
227       PetscCall(DMGetStratumIS(dm, "boundary", 1, &is));
228       PetscCall(DMCreateLabel(dm, "Faces"));
229       if (is) {
230         PetscInt        d, f, Nf;
231         const PetscInt *faces;
232         PetscInt        csize;
233         PetscSection    cs;
234         Vec             coordinates;
235         DM              cdm;
236         PetscCall(ISGetLocalSize(is, &Nf));
237         PetscCall(ISGetIndices(is, &faces));
238         PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
239         PetscCall(DMGetCoordinateDM(dm, &cdm));
240         PetscCall(DMGetLocalSection(cdm, &cs));
241         /* Check for each boundary face if any component of its centroid is either 0.0 or 1.0 */
242         for (f = 0; f < Nf; ++f) {
243           PetscReal    faceCoord;
244           PetscInt     b, v;
245           PetscScalar *coords = NULL;
246           PetscInt     Nv;
247           PetscCall(DMPlexVecGetClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
248           Nv = csize / dim; /* Calculate mean coordinate vector */
249           for (d = 0; d < dim; ++d) {
250             faceCoord = 0.0;
251             for (v = 0; v < Nv; ++v) faceCoord += PetscRealPart(coords[v * dim + d]);
252             faceCoord /= Nv;
253             for (b = 0; b < 2; ++b) {
254               if (PetscAbs(faceCoord - b) < PETSC_SMALL) { /* domain have not been set yet, still [0,1]^3 */
255                 PetscCall(DMSetLabelValue(dm, "Faces", faces[f], d * 2 + b + 1));
256               }
257             }
258           }
259           PetscCall(DMPlexVecRestoreClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
260         }
261         PetscCall(ISRestoreIndices(is, &faces));
262       }
263       PetscCall(ISDestroy(&is));
264       PetscCall(DMGetLabel(dm, "Faces", &label));
265       PetscCall(DMPlexLabelComplete(dm, label));
266     }
267   }
268   {
269     PetscInt     dimEmbed, i;
270     PetscInt     nCoords;
271     PetscScalar *coords, bounds[] = {
272                            0, 1, -.5, .5, -.5, .5,
273                          }; /* x_min,x_max,y_min,y_max */
274     Vec coordinates;
275     bounds[1] = Lx;
276     if (run_type == 1) {
277       for (i = 0; i < 2 * dim; i++) bounds[i] = (i % 2) ? 1 : 0;
278     }
279     PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
280     PetscCall(DMGetCoordinateDim(dm, &dimEmbed));
281     PetscCheck(dimEmbed == dim, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "dimEmbed != dim %" PetscInt_FMT, dimEmbed);
282     PetscCall(VecGetLocalSize(coordinates, &nCoords));
283     PetscCheck((nCoords % dimEmbed) == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Coordinate vector the wrong size");
284     PetscCall(VecGetArray(coordinates, &coords));
285     for (i = 0; i < nCoords; i += dimEmbed) {
286       PetscInt     j;
287       PetscScalar *coord = &coords[i];
288       for (j = 0; j < dimEmbed; j++) coord[j] = bounds[2 * j] + coord[j] * (bounds[2 * j + 1] - bounds[2 * j]);
289     }
290     PetscCall(VecRestoreArray(coordinates, &coords));
291     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
292   }
293 
294   /* convert to p4est, and distribute */
295   PetscOptionsBegin(comm, "", "Mesh conversion options", "DMPLEX");
296   PetscCall(PetscOptionsFList("-dm_type", "Convert DMPlex to another format (should not be Plex!)", "ex56.c", DMList, DMPLEX, convType, 256, &flg));
297   PetscOptionsEnd();
298   if (flg) {
299     DM newdm;
300     PetscCall(DMConvert(dm, convType, &newdm));
301     if (newdm) {
302       const char *prefix;
303       PetscBool   isForest;
304       PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix));
305       PetscCall(PetscObjectSetOptionsPrefix((PetscObject)newdm, prefix));
306       PetscCall(DMIsForest(newdm, &isForest));
307       PetscCheck(isForest, PETSC_COMM_WORLD, PETSC_ERR_USER, "Converted to non Forest?");
308       PetscCall(DMDestroy(&dm));
309       dm = newdm;
310     } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Convert failed?");
311   } else {
312     PetscPartitioner part;
313     /* Plex Distribute mesh over processes */
314     PetscCall(DMPlexGetPartitioner(dm, &part));
315     PetscCall(PetscPartitionerSetFromOptions(part));
316     PetscCall(DMPlexDistribute(dm, 0, NULL, &distdm));
317     if (distdm) {
318       const char *prefix;
319       PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix));
320       PetscCall(PetscObjectSetOptionsPrefix((PetscObject)distdm, prefix));
321       PetscCall(DMDestroy(&dm));
322       dm = distdm;
323     }
324   }
325   PetscCall(PetscLogStagePop());
326   basedm = dm;
327   dm     = NULL;
328 
329   for (iter = 0; iter < max_conv_its; iter++) {
330     PetscCall(PetscLogStagePush(stage[16]));
331     /* make new DM */
332     PetscCall(DMClone(basedm, &dm));
333     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)dm, "ex56_"));
334     PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh"));
335     if (max_conv_its > 1) {
336       /* If max_conv_its == 1, then we are not doing a convergence study. */
337       PetscCall(PetscOptionsInsertString(NULL, options[iter]));
338     }
339     PetscCall(DMSetFromOptions(dm)); /* refinement done here in Plex, p4est */
340     /* snes */
341     PetscCall(SNESCreate(comm, &snes));
342     PetscCall(SNESSetDM(snes, dm));
343     /* fem */
344     {
345       const PetscInt Ncomp        = dim;
346       const PetscInt components[] = {0, 1, 2};
347       const PetscInt Nfid = 1, Npid = 1;
348       const PetscInt fid[] = {1}; /* The fixed faces (x=0) */
349       const PetscInt pid[] = {2}; /* The faces with loading (x=L_x) */
350       PetscFE        fe;
351       PetscDS        prob;
352       DMLabel        label;
353       DM             cdm = dm;
354 
355       PetscCall(PetscFECreateDefault(PetscObjectComm((PetscObject)dm), dim, dim, PETSC_FALSE, NULL, PETSC_DECIDE, &fe)); /* elasticity */
356       PetscCall(PetscObjectSetName((PetscObject)fe, "deformation"));
357       /* FEM prob */
358       PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
359       PetscCall(DMCreateDS(dm));
360       PetscCall(DMGetDS(dm, &prob));
361       /* setup problem */
362       if (run_type == 1) {
363         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d));
364         PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_3d));
365       } else {
366         PetscWeakForm wf;
367         PetscInt      bd, i;
368 
369         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d_alpha));
370         PetscCall(PetscDSSetResidual(prob, 0, f0_u, f1_u_3d_alpha));
371 
372         PetscCall(DMGetLabel(dm, "Faces", &label));
373         PetscCall(DMAddBoundary(dm, DM_BC_NATURAL, "traction", label, Npid, pid, 0, Ncomp, components, NULL, NULL, NULL, &bd));
374         PetscCall(PetscDSGetBoundary(prob, bd, &wf, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
375         for (i = 0; i < Npid; ++i) PetscCall(PetscWeakFormSetIndexBdResidual(wf, label, pid[i], 0, 0, 0, f0_bd_u_3d, 0, f1_bd_u));
376       }
377       /* bcs */
378       if (run_type == 1) {
379         PetscInt id = 1;
380         PetscCall(DMGetLabel(dm, "boundary", &label));
381         PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))zero, NULL, NULL, NULL));
382       } else {
383         PetscCall(DMGetLabel(dm, "Faces", &label));
384         PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "fixed", label, Nfid, fid, 0, Ncomp, components, (void (*)(void))zero, NULL, NULL, NULL));
385       }
386       while (cdm) {
387         PetscCall(DMCopyDisc(dm, cdm));
388         PetscCall(DMGetCoarseDM(cdm, &cdm));
389       }
390       PetscCall(PetscFEDestroy(&fe));
391     }
392     /* vecs & mat */
393     PetscCall(DMCreateGlobalVector(dm, &xx));
394     PetscCall(VecDuplicate(xx, &bb));
395     PetscCall(PetscObjectSetName((PetscObject)bb, "b"));
396     PetscCall(PetscObjectSetName((PetscObject)xx, "u"));
397     PetscCall(DMCreateMatrix(dm, &Amat));
398     PetscCall(MatSetOption(Amat, MAT_SYMMETRIC, PETSC_TRUE));        /* Some matrix kernels can take advantage of symmetry if we set this. */
399     PetscCall(MatSetOption(Amat, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); /* Inform PETSc that Amat is always symmetric, so info set above isn't lost. */
400     PetscCall(MatSetBlockSize(Amat, 3));
401     PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
402     PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
403     PetscCall(VecGetSize(bb, &N));
404     local_sizes[iter] = N;
405     PetscCall(PetscInfo(snes, "%" PetscInt_FMT " global equations, %" PetscInt_FMT " vertices\n", N, N / dim));
406     if ((use_nearnullspace || attach_nearnullspace) && N / dim > 1) {
407       /* Set up the near null space (a.k.a. rigid body modes) that will be used by the multigrid preconditioner */
408       DM           subdm;
409       MatNullSpace nearNullSpace;
410       PetscInt     fields = 0;
411       PetscObject  deformation;
412       PetscCall(DMCreateSubDM(dm, 1, &fields, NULL, &subdm));
413       PetscCall(DMPlexCreateRigidBody(subdm, 0, &nearNullSpace));
414       PetscCall(DMGetField(dm, 0, NULL, &deformation));
415       PetscCall(PetscObjectCompose(deformation, "nearnullspace", (PetscObject)nearNullSpace));
416       PetscCall(DMDestroy(&subdm));
417       if (attach_nearnullspace) PetscCall(MatSetNearNullSpace(Amat, nearNullSpace));
418       PetscCall(MatNullSpaceDestroy(&nearNullSpace)); /* created by DM and destroyed by Mat */
419     }
420     PetscCall(DMPlexSetSNESLocalFEM(dm, NULL, NULL, NULL));
421     PetscCall(SNESSetJacobian(snes, Amat, Amat, NULL, NULL));
422     PetscCall(SNESSetFromOptions(snes));
423     PetscCall(DMSetUp(dm));
424     PetscCall(PetscLogStagePop());
425     PetscCall(PetscLogStagePush(stage[16]));
426     /* ksp */
427     PetscCall(SNESGetKSP(snes, &ksp));
428     PetscCall(KSPSetComputeSingularValues(ksp, PETSC_TRUE));
429     /* test BCs */
430     PetscCall(VecZeroEntries(xx));
431     if (test_nonzero_cols) {
432       if (rank == 0) PetscCall(VecSetValue(xx, 0, 1.0, INSERT_VALUES));
433       PetscCall(VecAssemblyBegin(xx));
434       PetscCall(VecAssemblyEnd(xx));
435     }
436     PetscCall(VecZeroEntries(bb));
437     PetscCall(VecGetSize(bb, &i));
438     local_sizes[iter] = i;
439     PetscCall(PetscInfo(snes, "%" PetscInt_FMT " equations in vector, %" PetscInt_FMT " vertices\n", i, i / dim));
440     PetscCall(PetscLogStagePop());
441     /* solve */
442     PetscCall(PetscLogStagePush(stage[iter]));
443     PetscCall(SNESSolve(snes, bb, xx));
444     PetscCall(PetscLogStagePop());
445     PetscCall(VecNorm(xx, NORM_INFINITY, &mdisp[iter]));
446     PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
447     {
448       PetscViewer       viewer = NULL;
449       PetscViewerFormat fmt;
450       PetscCall(PetscOptionsGetViewer(comm, NULL, "ex56_", "-vec_view", &viewer, &fmt, &flg));
451       if (flg) {
452         PetscCall(PetscViewerPushFormat(viewer, fmt));
453         PetscCall(VecView(xx, viewer));
454         PetscCall(VecView(bb, viewer));
455         PetscCall(PetscViewerPopFormat(viewer));
456       }
457       PetscCall(PetscViewerDestroy(&viewer));
458     }
459     /* Free work space */
460     PetscCall(DMDestroy(&dm));
461     PetscCall(SNESDestroy(&snes));
462     PetscCall(VecDestroy(&xx));
463     PetscCall(VecDestroy(&bb));
464     PetscCall(MatDestroy(&Amat));
465   }
466   PetscCall(DMDestroy(&basedm));
467   if (run_type == 1) err[0] = 59.975208 - mdisp[0]; /* error with what I think is the exact solution */
468   else err[0] = 171.038 - mdisp[0];
469   for (iter = 1; iter < max_conv_its; iter++) {
470     if (run_type == 1) err[iter] = 59.975208 - mdisp[iter];
471     else err[iter] = 171.038 - mdisp[iter];
472     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %" PetscInt_FMT ") N=%12" PetscInt_FMT ", max displ=%9.7e, disp diff=%9.2e, error=%4.3e, rate=%3.2g\n", rank, iter, local_sizes[iter], (double)mdisp[iter], (double)(mdisp[iter] - mdisp[iter - 1]), (double)err[iter], (double)(PetscLogReal(err[iter - 1] / err[iter]) / PetscLogReal(2.))));
473   }
474 
475   PetscCall(PetscFinalize());
476   return 0;
477 }
478 
479 /*TEST
480 
481   test:
482     suffix: 0
483     nsize: 4
484     requires: !single
485     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 3 -snes_max_it 1 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -ksp_converged_reason -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -ex56_dm_view -snes_lag_jacobian -2 -snes_type ksponly -use_gpu_aware_mpi true
486     timeoutfactor: 2
487 
488   # HYPRE PtAP broken with complex numbers
489   test:
490     suffix: hypre
491     requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
492     nsize: 4
493     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -pc_type hypre -pc_hypre_type boomeramg -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -ksp_converged_reason -use_mat_nearnullspace true -petscpartitioner_type simple
494 
495   test:
496     suffix: ml
497     requires: ml !single
498     nsize: 4
499     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type ml -mg_levels_ksp_type chebyshev -mg_levels_ksp_max_it 3 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type sor -petscpartitioner_type simple -use_mat_nearnullspace
500 
501   test:
502     suffix: hpddm
503     requires: hpddm slepc !single defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
504     nsize: 4
505     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fgmres -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type hpddm -petscpartitioner_type simple -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 6 -pc_hpddm_coarse_p 1 -pc_hpddm_coarse_pc_type svd
506 
507   test:
508     suffix: repart
509     nsize: 4
510     requires: parmetis !single
511     args: -cells 8,2,2 -max_conv_its 1 -petscspace_degree 2 -snes_max_it 4 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-2 -ksp_norm_type unpreconditioned -snes_rtol 1.e-3 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -pc_gamg_mat_partitioning_type parmetis -pc_gamg_repartition true -snes_converged_reason -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 10 -ksp_converged_reason -snes_converged_reason -pc_gamg_reuse_interpolation true
512 
513   test:
514     suffix: bddc
515     nsize: 4
516     requires: !single
517     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type {{sbaij baij aij}} -pc_type bddc
518 
519   testset:
520     nsize: 4
521     requires: !single
522     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-10 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type aij -pc_type bddc -attach_mat_nearnullspace {{0 1}separate output}
523     test:
524       suffix: bddc_approx_gamg
525       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
526     # HYPRE PtAP broken with complex numbers
527     test:
528       requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
529       suffix: bddc_approx_hypre
530       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop -prefix_push pc_bddc_neumann_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop
531     test:
532       requires: ml
533       suffix: bddc_approx_ml
534       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
535 
536   test:
537     suffix: fetidp
538     nsize: 4
539     requires: !single
540     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type {{sbaij baij aij}}
541 
542   test:
543     suffix: bddc_elast
544     nsize: 4
545     requires: !single
546     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type sbaij -pc_type bddc -pc_bddc_monolithic -attach_mat_nearnullspace
547 
548   test:
549     suffix: fetidp_elast
550     nsize: 4
551     requires: !single
552     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type sbaij -fetidp_bddc_pc_bddc_monolithic -attach_mat_nearnullspace
553 
554   test:
555     suffix: gdsw
556     nsize: 4
557     requires: !single
558     args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -attach_mat_nearnullspace \
559           -pc_type mg -pc_mg_galerkin -pc_mg_adapt_interp_coarse_space gdsw -pc_mg_levels 2 -mg_levels_pc_type bjacobi -mg_levels_sub_pc_type icc
560 
561   testset:
562     nsize: 4
563     requires: !single
564     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -ksp_monitor_short -ksp_converged_reason -snes_converged_reason -snes_monitor_short -ex56_dm_view -petscpartitioner_type simple -pc_gamg_process_eq_limit 20
565     output_file: output/ex56_cuda.out
566 
567     test:
568       suffix: cuda
569       requires: cuda
570       args: -ex56_dm_mat_type aijcusparse -ex56_dm_vec_type cuda
571 
572     test:
573       suffix: hip
574       requires: hip
575       args: -ex56_dm_mat_type aijhipsparse -ex56_dm_vec_type hip
576 
577     test:
578       suffix: viennacl
579       requires: viennacl
580       args: -ex56_dm_mat_type aijviennacl -ex56_dm_vec_type viennacl
581 
582     test:
583       suffix: kokkos
584       requires: kokkos_kernels
585       args: -ex56_dm_mat_type aijkokkos -ex56_dm_vec_type kokkos
586   # Don't run AIJMKL caes with complex scalars because of convergence issues.
587   # Note that we need to test both single and multiple MPI rank cases, because these use different sparse MKL routines to implement the PtAP operation.
588   test:
589     suffix: seqaijmkl
590     nsize: 1
591     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
592     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -ex56_dm_view -run_type 1 -mat_seqaij_type seqaijmkl
593     timeoutfactor: 2
594 
595   test:
596     suffix: mpiaijmkl
597     nsize: 2
598     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
599     args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -ex56_dm_view -run_type 1 -mat_seqaij_type seqaijmkl
600     timeoutfactor: 2
601 
602 TEST*/
603