xref: /petsc/src/snes/tutorials/ex56.c (revision fbf9dbe564678ed6eff1806adbc4c4f01b9743f4)
1 /* Portions of this code are under:
2    Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved.
3 */
4 static char help[] = "3D tensor hexahedra & 3D Laplacian displacement finite element formulation\n\
5 of linear elasticity.  E=1.0, nu=1/3.\n\
6 Unit cube domain with Dirichlet boundary\n\n";
7 
8 #include <petscdmplex.h>
9 #include <petscsnes.h>
10 #include <petscds.h>
11 #include <petscdmforest.h>
12 
13 static PetscReal s_soft_alpha = 0.01;
14 static PetscReal s_mu         = 0.4;
15 static PetscReal s_lambda     = 0.4;
16 
17 static void f0_bd_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
18 {
19   f0[0] = 1;     /* x direction pull */
20   f0[1] = -x[2]; /* add a twist around x-axis */
21   f0[2] = x[1];
22 }
23 
24 static void f1_bd_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
25 {
26   const PetscInt Ncomp = dim;
27   PetscInt       comp, d;
28   for (comp = 0; comp < Ncomp; ++comp) {
29     for (d = 0; d < dim; ++d) f1[comp * dim + d] = 0.0;
30   }
31 }
32 
33 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
34 static void f1_u_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
35 {
36   PetscReal trace, mu = s_mu, lambda = s_lambda, rad;
37   PetscInt  i, j;
38   for (i = 0, rad = 0.; i < dim; i++) {
39     PetscReal t = x[i];
40     rad += t * t;
41   }
42   rad = PetscSqrtReal(rad);
43   if (rad > 0.25) {
44     mu *= s_soft_alpha;
45     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
46   }
47   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
48   for (i = 0; i < dim; ++i) {
49     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
50     f1[i * dim + i] += lambda * trace;
51   }
52 }
53 
54 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */
55 static void f1_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
56 {
57   PetscReal trace, mu = s_mu, lambda = s_lambda;
58   PetscInt  i, j;
59   for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]);
60   for (i = 0; i < dim; ++i) {
61     for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]);
62     f1[i * dim + i] += lambda * trace;
63   }
64 }
65 
66 static void f1_u_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
67 {
68   PetscInt d;
69   for (d = 0; d < dim; ++d) f1[d] = u_x[d];
70 }
71 
72 /* 3D elasticity */
73 #define IDX(ii, jj, kk, ll) (27 * ii + 9 * jj + 3 * kk + ll)
74 
75 void g3_uu_3d_private(PetscScalar g3[], const PetscReal mu, const PetscReal lambda)
76 {
77   if (1) {
78     g3[0] += lambda;
79     g3[0] += mu;
80     g3[0] += mu;
81     g3[4] += lambda;
82     g3[8] += lambda;
83     g3[10] += mu;
84     g3[12] += mu;
85     g3[20] += mu;
86     g3[24] += mu;
87     g3[28] += mu;
88     g3[30] += mu;
89     g3[36] += lambda;
90     g3[40] += lambda;
91     g3[40] += mu;
92     g3[40] += mu;
93     g3[44] += lambda;
94     g3[50] += mu;
95     g3[52] += mu;
96     g3[56] += mu;
97     g3[60] += mu;
98     g3[68] += mu;
99     g3[70] += mu;
100     g3[72] += lambda;
101     g3[76] += lambda;
102     g3[80] += lambda;
103     g3[80] += mu;
104     g3[80] += mu;
105   } else {
106     int        i, j, k, l;
107     static int cc = -1;
108     cc++;
109     for (i = 0; i < 3; ++i) {
110       for (j = 0; j < 3; ++j) {
111         for (k = 0; k < 3; ++k) {
112           for (l = 0; l < 3; ++l) {
113             if (k == l && i == j) g3[IDX(i, j, k, l)] += lambda;
114             if (i == k && j == l) g3[IDX(i, j, k, l)] += mu;
115             if (i == l && j == k) g3[IDX(i, j, k, l)] += mu;
116             if (k == l && i == j && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += lambda;\n", IDX(i, j, k, l));
117             if (i == k && j == l && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
118             if (i == l && j == k && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l));
119           }
120         }
121       }
122     }
123   }
124 }
125 
126 static void g3_uu_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
127 {
128   PetscReal mu = s_mu, lambda = s_lambda, rad;
129   PetscInt  i;
130   for (i = 0, rad = 0.; i < dim; i++) {
131     PetscReal t = x[i];
132     rad += t * t;
133   }
134   rad = PetscSqrtReal(rad);
135   if (rad > 0.25) {
136     mu *= s_soft_alpha;
137     lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */
138   }
139   g3_uu_3d_private(g3, mu, lambda);
140 }
141 
142 static void g3_uu_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
143 {
144   g3_uu_3d_private(g3, s_mu, s_lambda);
145 }
146 
147 static void g3_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
148 {
149   PetscInt d;
150   for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
151 }
152 
153 static void g3_lap_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
154 {
155   PetscReal lambda = 1, rad;
156   PetscInt  i;
157   for (i = 0, rad = 0.; i < dim; i++) {
158     PetscReal t = x[i];
159     rad += t * t;
160   }
161   rad = PetscSqrtReal(rad);
162   if (rad > 0.25) { lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */ }
163   for (int d = 0; d < dim; ++d) g3[d * dim + d] = lambda;
164 }
165 
166 static void f0_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
167 {
168   const PetscInt Ncomp = dim;
169   PetscInt       comp;
170 
171   for (comp = 0; comp < Ncomp; ++comp) f0[comp] = 0.0;
172 }
173 
174 /* PI_i (x_i^4 - x_i^2) */
175 static void f0_u_x4(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
176 {
177   for (int comp = 0; comp < Nf; ++comp) {
178     f0[comp] = 1e5;
179     for (int i = 0; i < dim; ++i) { f0[comp] *= /* (comp+1)* */ (x[i] * x[i] * x[i] * x[i] - x[i] * x[i]); /* assumes (0,1]^D domain */ }
180   }
181 }
182 
183 PetscErrorCode zero(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, void *ctx)
184 {
185   const PetscInt Ncomp = dim;
186   PetscInt       comp;
187 
188   for (comp = 0; comp < Ncomp; ++comp) u[comp] = 0;
189   return PETSC_SUCCESS;
190 }
191 
192 int main(int argc, char **args)
193 {
194   Mat         Amat;
195   SNES        snes;
196   KSP         ksp;
197   MPI_Comm    comm;
198   PetscMPIInt rank;
199 #if defined(PETSC_USE_LOG)
200   PetscLogStage stage[17];
201 #endif
202   PetscBool test_nonzero_cols = PETSC_FALSE, use_nearnullspace = PETSC_TRUE, attach_nearnullspace = PETSC_FALSE;
203   Vec       xx, bb;
204   PetscInt  iter, i, N, dim = 3, max_conv_its, sizes[7], run_type = 1, Ncomp = dim;
205   DM        dm;
206   PetscBool flg;
207   PetscReal Lx, mdisp[10], err[10];
208   PetscFunctionBeginUser;
209   PetscFunctionBeginUser;
210   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
211   comm = PETSC_COMM_WORLD;
212   PetscCallMPI(MPI_Comm_rank(comm, &rank));
213   /* options */
214   PetscOptionsBegin(comm, NULL, "3D bilinear Q1 elasticity options", "");
215   {
216     Lx           = 1.; /* or ne for rod */
217     max_conv_its = 3;
218     PetscCall(PetscOptionsInt("-max_conv_its", "Number of iterations in convergence study", "", max_conv_its, &max_conv_its, NULL));
219     PetscCheck(max_conv_its > 0 && max_conv_its < 8, PETSC_COMM_WORLD, PETSC_ERR_USER, "Bad number of iterations for convergence test (%" PetscInt_FMT ")", max_conv_its);
220     PetscCall(PetscOptionsReal("-lx", "Length of domain", "", Lx, &Lx, NULL));
221     PetscCall(PetscOptionsReal("-alpha", "material coefficient inside circle", "", s_soft_alpha, &s_soft_alpha, NULL));
222     PetscCall(PetscOptionsBool("-test_nonzero_cols", "nonzero test", "", test_nonzero_cols, &test_nonzero_cols, NULL));
223     PetscCall(PetscOptionsBool("-use_mat_nearnullspace", "MatNearNullSpace API test", "", use_nearnullspace, &use_nearnullspace, NULL));
224     PetscCall(PetscOptionsBool("-attach_mat_nearnullspace", "MatNearNullSpace API test (via MatSetNearNullSpace)", "", attach_nearnullspace, &attach_nearnullspace, NULL));
225     PetscCall(PetscOptionsInt("-run_type", "0: twisting load on cantalever, 1: Elasticty convergence test on cube, 2: Laplacian, 3: soft core Laplacian", "", run_type, &run_type, NULL));
226   }
227   PetscOptionsEnd();
228   PetscCall(PetscLogStageRegister("Mesh Setup", &stage[16]));
229   for (iter = 0; iter < max_conv_its; iter++) {
230     char str[] = "Solve 0";
231     str[6] += iter;
232     PetscCall(PetscLogStageRegister(str, &stage[iter]));
233   }
234   /* create DM, Plex calls DMSetup */
235   PetscCall(PetscLogStagePush(stage[16]));
236   PetscCall(DMCreate(comm, &dm));
237   PetscCall(DMSetType(dm, DMPLEX));
238   PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh"));
239   PetscCall(DMSetFromOptions(dm));
240   PetscCall(DMPlexDistributeSetDefault(dm, PETSC_FALSE));
241   PetscCall(DMGetDimension(dm, &dim));
242   {
243     DMLabel label;
244     IS      is;
245     PetscCall(DMCreateLabel(dm, "boundary"));
246     PetscCall(DMGetLabel(dm, "boundary", &label));
247     PetscCall(DMPlexMarkBoundaryFaces(dm, 1, label));
248     if (run_type == 0) {
249       PetscCall(DMGetStratumIS(dm, "boundary", 1, &is));
250       PetscCall(DMCreateLabel(dm, "Faces"));
251       if (is) {
252         PetscInt        d, f, Nf;
253         const PetscInt *faces;
254         PetscInt        csize;
255         PetscSection    cs;
256         Vec             coordinates;
257         DM              cdm;
258         PetscCall(ISGetLocalSize(is, &Nf));
259         PetscCall(ISGetIndices(is, &faces));
260         PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
261         PetscCall(DMGetCoordinateDM(dm, &cdm));
262         PetscCall(DMGetLocalSection(cdm, &cs));
263         /* Check for each boundary face if any component of its centroid is either 0.0 or 1.0 */
264         for (f = 0; f < Nf; ++f) {
265           PetscReal    faceCoord;
266           PetscInt     b, v;
267           PetscScalar *coords = NULL;
268           PetscInt     Nv;
269           PetscCall(DMPlexVecGetClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
270           Nv = csize / dim; /* Calculate mean coordinate vector */
271           for (d = 0; d < dim; ++d) {
272             faceCoord = 0.0;
273             for (v = 0; v < Nv; ++v) faceCoord += PetscRealPart(coords[v * dim + d]);
274             faceCoord /= Nv;
275             for (b = 0; b < 2; ++b) {
276               if (PetscAbs(faceCoord - b) < PETSC_SMALL) { /* domain have not been set yet, still [0,1]^3 */
277                 PetscCall(DMSetLabelValue(dm, "Faces", faces[f], d * 2 + b + 1));
278               }
279             }
280           }
281           PetscCall(DMPlexVecRestoreClosure(cdm, cs, coordinates, faces[f], &csize, &coords));
282         }
283         PetscCall(ISRestoreIndices(is, &faces));
284       }
285       PetscCall(ISDestroy(&is));
286       PetscCall(DMGetLabel(dm, "Faces", &label));
287       PetscCall(DMPlexLabelComplete(dm, label));
288     }
289   }
290   PetscCall(PetscLogStagePop());
291   for (iter = 0; iter < max_conv_its; iter++) {
292     PetscCall(PetscLogStagePush(stage[16]));
293     /* snes */
294     PetscCall(SNESCreate(comm, &snes));
295     PetscCall(SNESSetDM(snes, dm));
296     PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
297     /* fem */
298     {
299       const PetscInt components[] = {0, 1, 2};
300       const PetscInt Nfid = 1, Npid = 1;
301       PetscInt       fid[] = {1}; /* The fixed faces (x=0) */
302       const PetscInt pid[] = {2}; /* The faces with loading (x=L_x) */
303       PetscFE        fe;
304       PetscDS        prob;
305       DMLabel        label;
306 
307       if (run_type == 2 || run_type == 3) Ncomp = 1;
308       else Ncomp = dim;
309       PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, Ncomp, PETSC_FALSE, NULL, PETSC_DECIDE, &fe));
310       PetscCall(PetscObjectSetName((PetscObject)fe, "deformation"));
311       /* FEM prob */
312       PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
313       PetscCall(DMCreateDS(dm));
314       PetscCall(DMGetDS(dm, &prob));
315       /* setup problem */
316       if (run_type == 1) { // elast
317         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d));
318         PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_3d));
319       } else if (run_type == 0) { //twisted not maintained
320         PetscWeakForm wf;
321         PetscInt      bd, i;
322         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d_alpha));
323         PetscCall(PetscDSSetResidual(prob, 0, f0_u, f1_u_3d_alpha));
324         PetscCall(DMGetLabel(dm, "Faces", &label));
325         PetscCall(DMAddBoundary(dm, DM_BC_NATURAL, "traction", label, Npid, pid, 0, Ncomp, components, NULL, NULL, NULL, &bd));
326         PetscCall(PetscDSGetBoundary(prob, bd, &wf, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
327         for (i = 0; i < Npid; ++i) PetscCall(PetscWeakFormSetIndexBdResidual(wf, label, pid[i], 0, 0, 0, f0_bd_u_3d, 0, f1_bd_u));
328       } else if (run_type == 2) { // Laplacian
329         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap));
330         PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap));
331       } else if (run_type == 3) { // soft core Laplacian
332         PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap_alpha));
333         PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap));
334       }
335       /* bcs */
336       if (run_type != 0) {
337         PetscInt id = 1;
338         PetscCall(DMGetLabel(dm, "boundary", &label));
339         PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))zero, NULL, NULL, NULL));
340       } else {
341         PetscCall(DMGetLabel(dm, "Faces", &label));
342         PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "fixed", label, Nfid, fid, 0, Ncomp, components, (void (*)(void))zero, NULL, NULL, NULL));
343       }
344       PetscCall(PetscFEDestroy(&fe));
345     }
346     /* vecs & mat */
347     PetscCall(DMCreateGlobalVector(dm, &xx));
348     PetscCall(VecDuplicate(xx, &bb));
349     PetscCall(PetscObjectSetName((PetscObject)bb, "b"));
350     PetscCall(PetscObjectSetName((PetscObject)xx, "u"));
351     PetscCall(DMCreateMatrix(dm, &Amat));
352     PetscCall(MatSetOption(Amat, MAT_SYMMETRIC, PETSC_TRUE));        /* Some matrix kernels can take advantage of symmetry if we set this. */
353     PetscCall(MatSetOption(Amat, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); /* Inform PETSc that Amat is always symmetric, so info set above isn't lost. */
354     PetscCall(MatSetBlockSize(Amat, Ncomp));
355     PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
356     PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
357     PetscCall(VecGetSize(bb, &N));
358     sizes[iter] = N;
359     PetscCall(PetscInfo(snes, "%" PetscInt_FMT " global equations, %" PetscInt_FMT " vertices\n", N, N / dim));
360     if ((use_nearnullspace || attach_nearnullspace) && N / dim > 1 && Ncomp > 1) {
361       /* Set up the near null space (a.k.a. rigid body modes) that will be used by the multigrid preconditioner */
362       DM           subdm;
363       MatNullSpace nearNullSpace;
364       PetscInt     fields = 0;
365       PetscObject  deformation;
366       PetscCall(DMCreateSubDM(dm, 1, &fields, NULL, &subdm));
367       PetscCall(DMPlexCreateRigidBody(subdm, 0, &nearNullSpace));
368       PetscCall(DMGetField(dm, 0, NULL, &deformation));
369       PetscCall(PetscObjectCompose(deformation, "nearnullspace", (PetscObject)nearNullSpace));
370       PetscCall(DMDestroy(&subdm));
371       if (attach_nearnullspace) PetscCall(MatSetNearNullSpace(Amat, nearNullSpace));
372       PetscCall(MatNullSpaceDestroy(&nearNullSpace)); /* created by DM and destroyed by Mat */
373     }
374     PetscCall(DMPlexSetSNESLocalFEM(dm, NULL, NULL, NULL));
375     PetscCall(SNESSetJacobian(snes, Amat, Amat, NULL, NULL));
376     PetscCall(SNESSetFromOptions(snes));
377     PetscCall(DMSetUp(dm));
378     PetscCall(PetscLogStagePop());
379     PetscCall(PetscLogStagePush(stage[16]));
380     /* ksp */
381     PetscCall(SNESGetKSP(snes, &ksp));
382     PetscCall(KSPSetComputeSingularValues(ksp, PETSC_TRUE));
383     /* test BCs */
384     PetscCall(VecZeroEntries(xx));
385     if (test_nonzero_cols) {
386       if (rank == 0) PetscCall(VecSetValue(xx, 0, 1.0, INSERT_VALUES));
387       PetscCall(VecAssemblyBegin(xx));
388       PetscCall(VecAssemblyEnd(xx));
389     }
390     PetscCall(VecZeroEntries(bb));
391     PetscCall(VecGetSize(bb, &i));
392     sizes[iter] = i;
393     PetscCall(PetscInfo(snes, "%" PetscInt_FMT " equations in vector, %" PetscInt_FMT " vertices\n", i, i / dim));
394     PetscCall(PetscLogStagePop());
395     /* solve */
396     PetscCall(SNESComputeJacobian(snes, xx, Amat, Amat));
397     PetscCall(MatViewFromOptions(Amat, NULL, "-my_mat_view"));
398     PetscCall(PetscLogStagePush(stage[iter]));
399     PetscCall(SNESSolve(snes, bb, xx));
400     PetscCall(PetscLogStagePop());
401     PetscCall(VecNorm(xx, NORM_INFINITY, &mdisp[iter]));
402     {
403       PetscViewer       viewer = NULL;
404       PetscViewerFormat fmt;
405       PetscCall(PetscOptionsGetViewer(comm, NULL, "", "-vec_view", &viewer, &fmt, &flg));
406       if (flg) {
407         PetscCall(PetscViewerPushFormat(viewer, fmt));
408         PetscCall(VecView(xx, viewer));
409         PetscCall(VecView(bb, viewer));
410         PetscCall(PetscViewerPopFormat(viewer));
411       }
412       PetscCall(PetscViewerDestroy(&viewer));
413     }
414     /* Free work space */
415     PetscCall(SNESDestroy(&snes));
416     PetscCall(VecDestroy(&xx));
417     PetscCall(VecDestroy(&bb));
418     PetscCall(MatDestroy(&Amat));
419     if (iter + 1 < max_conv_its) {
420       DM newdm;
421       PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view"));
422       PetscCall(DMRefine(dm, comm, &newdm));
423       if (rank == -1) {
424         PetscDS prob;
425         PetscCall(DMGetDS(dm, &prob));
426         PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view"));
427         PetscCall(DMGetDS(newdm, &prob));
428         PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view"));
429       }
430       PetscCall(DMDestroy(&dm));
431       dm = newdm;
432       PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh"));
433       PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view"));
434       PetscCall(DMSetFromOptions(dm));
435     }
436   }
437   PetscCall(DMDestroy(&dm));
438   if (run_type == 1) err[0] = 5.97537599375e+01 - mdisp[0]; /* error with what I think is the exact solution */
439   else if (run_type == 0) err[0] = 0;
440   else if (run_type == 2) err[0] = 3.527795e+01 - mdisp[0];
441   else err[0] = 0;
442   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %d) N=%12" PetscInt_FMT ", max displ=%9.7e, error=%4.3e\n", rank, 0, sizes[0], (double)mdisp[0], (double)err[0]));
443   for (iter = 1; iter < max_conv_its; iter++) {
444     if (run_type == 1) err[iter] = 5.97537599375e+01 - mdisp[iter];
445     else if (run_type == 0) err[iter] = 0;
446     else if (run_type == 2) err[iter] = 3.527795e+01 - mdisp[iter];
447     else err[iter] = 0;
448     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %" PetscInt_FMT ") N=%12" PetscInt_FMT ", max displ=%9.7e, disp diff=%9.2e, error=%4.3e, rate=%3.2g\n", rank, iter, sizes[iter], (double)mdisp[iter], (double)(mdisp[iter] - mdisp[iter - 1]), (double)err[iter], (double)(PetscLogReal(PetscAbs(err[iter - 1] / err[iter])) / PetscLogReal(2.))));
449   }
450 
451   PetscCall(PetscFinalize());
452   return 0;
453 }
454 
455 /*TEST
456 
457   testset:
458     nsize: 4
459     requires: !single
460     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 1 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.001 -ksp_converged_reason  -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -my_dm_view -snes_lag_jacobian -2 -snes_type ksponly
461     timeoutfactor: 2
462     test:
463       suffix: 0
464       args: -run_type 1
465     test:
466       suffix: 1
467       args: -run_type 2
468 
469   # HYPRE PtAP broken with complex numbers
470   test:
471     suffix: hypre
472     requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
473     nsize: 4
474     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -pc_type hypre -pc_hypre_type boomeramg -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -ksp_converged_reason -use_mat_nearnullspace true -petscpartitioner_type simple
475 
476   test:
477     suffix: ml
478     requires: ml !single
479     nsize: 4
480     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type ml -mg_levels_ksp_type chebyshev -mg_levels_ksp_max_it 3 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type sor -petscpartitioner_type simple -use_mat_nearnullspace
481 
482   test:
483     suffix: hpddm
484     requires: hpddm slepc !single defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES)
485     nsize: 4
486     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fgmres -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type hpddm -petscpartitioner_type simple -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 6 -pc_hpddm_coarse_p 1 -pc_hpddm_coarse_pc_type svd
487 
488   test:
489     suffix: repart
490     nsize: 4
491     requires: parmetis !single
492     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 4 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-2 -ksp_norm_type unpreconditioned -snes_rtol 1.e-3 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -pc_gamg_mat_partitioning_type parmetis -pc_gamg_repartition true  -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 10 -ksp_converged_reason  -pc_gamg_reuse_interpolation true -petscpartitioner_type simple
493 
494   test:
495     suffix: bddc
496     nsize: 4
497     requires: !single
498     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type {{sbaij baij aij}} -pc_type bddc
499 
500   testset:
501     nsize: 4
502     requires: !single
503     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-10 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type aij -pc_type bddc -attach_mat_nearnullspace {{0 1}separate output}
504     test:
505       suffix: bddc_approx_gamg
506       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
507     # HYPRE PtAP broken with complex numbers
508     test:
509       requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE)
510       suffix: bddc_approx_hypre
511       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop -prefix_push pc_bddc_neumann_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop
512     test:
513       requires: ml
514       suffix: bddc_approx_ml
515       args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop
516 
517   test:
518     suffix: fetidp
519     nsize: 4
520     requires: !single
521     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type {{sbaij baij aij}}
522 
523   test:
524     suffix: bddc_elast
525     nsize: 4
526     requires: !single
527     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type sbaij -pc_type bddc -pc_bddc_monolithic -attach_mat_nearnullspace
528 
529   test:
530     suffix: fetidp_elast
531     nsize: 4
532     requires: !single
533     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type sbaij -fetidp_bddc_pc_bddc_monolithic -attach_mat_nearnullspace
534 
535   test:
536     suffix: gdsw
537     nsize: 4
538     requires: !single
539     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -attach_mat_nearnullspace \
540           -pc_type mg -pc_mg_galerkin -pc_mg_adapt_interp_coarse_space gdsw -pc_mg_levels 2 -mg_levels_pc_type bjacobi -mg_levels_sub_pc_type icc
541 
542   testset:
543     nsize: 4
544     requires: !single
545     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -ksp_monitor_short -ksp_converged_reason  -snes_monitor_short -dm_view -petscpartitioner_type simple -pc_gamg_process_eq_limit 20
546     output_file: output/ex56_cuda.out
547 
548     test:
549       suffix: cuda
550       requires: cuda
551       args: -dm_mat_type aijcusparse -dm_vec_type cuda
552 
553     test:
554       suffix: hip
555       requires: hip
556       args: -dm_mat_type aijhipsparse -dm_vec_type hip
557 
558     test:
559       suffix: viennacl
560       requires: viennacl
561       args: -dm_mat_type aijviennacl -dm_vec_type viennacl
562 
563     test:
564       suffix: kokkos
565       requires: kokkos_kernels
566       args: -dm_mat_type aijkokkos -dm_vec_type kokkos
567   # Don't run AIJMKL caes with complex scalars because of convergence issues.
568   # Note that we need to test both single and multiple MPI rank cases, because these use different sparse MKL routines to implement the PtAP operation.
569   test:
570     suffix: seqaijmkl
571     nsize: 1
572     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
573     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl
574     timeoutfactor: 2
575 
576   test:
577     suffix: mpiaijmkl
578     nsize: 4
579     requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex
580     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl
581     timeoutfactor: 2
582 
583 TEST*/
584