xref: /libCEED/tests/t541-operator.c (revision 75a19770cda24d00f4da8d6890f256f14dd4565b)
1 /// @file
2 /// Test creation and use of FDM element inverse
3 /// \test Test creation and use of FDM element inverse
4 #include <ceed.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <math.h>
8 #include "t541-operator.h"
9 
10 int main(int argc, char **argv) {
11   Ceed ceed;
12   CeedElemRestriction elem_restr_x_i, elem_restr_u_i, elem_restr_qd_i;
13   CeedBasis basis_x, basis_u;
14   CeedQFunction qf_setup_diff, qf_apply;
15   CeedOperator op_setup_diff, op_apply, op_inv;
16   CeedVector q_data_diff, X, U, V, W;
17   CeedInt num_elem = 1, P = 4, Q = 5, dim = 2;
18   CeedInt num_dofs = P*P, num_qpts = num_elem*Q*Q, q_data_size = dim*(dim+1)/2;
19   CeedScalar x[dim*num_elem*(2*2)];
20 
21   CeedInit(argv[1], &ceed);
22 
23   // Test skipped if using single precision
24   if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
25     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
26                      "Test not implemented in single precision");
27   }
28 
29   // DoF Coordinates
30   for (CeedInt i=0; i<2; i++)
31     for (CeedInt j=0; j<2; j++) {
32       x[i+j*2+0*4] = i;
33       x[i+j*2+1*4] = j;
34     }
35   CeedVectorCreate(ceed, dim*num_elem*(2*2), &X);
36   CeedVectorSetArray(X, CEED_MEM_HOST, CEED_USE_POINTER, x);
37 
38   // Qdata Vector
39   CeedVectorCreate(ceed, q_data_size*num_qpts, &q_data_diff);
40 
41   // Element Setup
42 
43   // Restrictions
44   CeedInt strides_x[3] = {1, 2*2, 2*2*dim};
45   CeedElemRestrictionCreateStrided(ceed, num_elem, 2*2, dim, dim*num_elem*2*2,
46                                    strides_x, &elem_restr_x_i);
47 
48   CeedInt strides_u[3] = {1, P*P, P*P};
49   CeedElemRestrictionCreateStrided(ceed, num_elem, P*P, 1, num_dofs, strides_u,
50                                    &elem_restr_u_i);
51 
52   CeedInt strides_qd[3] = {1, Q*Q, q_data_size *Q*Q};
53   CeedElemRestrictionCreateStrided(ceed, num_elem, Q*Q, q_data_size,
54                                    num_qpts*q_data_size, strides_qd, &elem_restr_qd_i);
55 
56   // Bases
57   CeedBasisCreateTensorH1Lagrange(ceed, dim, dim, 2, Q, CEED_GAUSS, &basis_x);
58   CeedBasisCreateTensorH1Lagrange(ceed, dim, 1, P, Q, CEED_GAUSS, &basis_u);
59 
60   // QFunction - setup diff
61   CeedQFunctionCreateInterior(ceed, 1, setup_diff, setup_diff_loc,
62                               &qf_setup_diff);
63   CeedQFunctionAddInput(qf_setup_diff, "dx", dim*dim, CEED_EVAL_GRAD);
64   CeedQFunctionAddInput(qf_setup_diff, "weight", 1, CEED_EVAL_WEIGHT);
65   CeedQFunctionAddOutput(qf_setup_diff, "qdata", q_data_size, CEED_EVAL_NONE);
66 
67   // Operator - setup diff
68   CeedOperatorCreate(ceed, qf_setup_diff, CEED_QFUNCTION_NONE,
69                      CEED_QFUNCTION_NONE, &op_setup_diff);
70   CeedOperatorSetField(op_setup_diff, "dx", elem_restr_x_i, basis_x,
71                        CEED_VECTOR_ACTIVE);
72   CeedOperatorSetField(op_setup_diff, "weight", CEED_ELEMRESTRICTION_NONE,
73                        basis_x, CEED_VECTOR_NONE);
74   CeedOperatorSetField(op_setup_diff, "qdata", elem_restr_qd_i,
75                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
76 
77   // Apply Setup Operator
78   CeedOperatorApply(op_setup_diff, X, q_data_diff, CEED_REQUEST_IMMEDIATE);
79 
80   // QFunction - apply
81   CeedQFunctionCreateInterior(ceed, 1, apply, apply_loc, &qf_apply);
82   CeedQFunctionAddInput(qf_apply, "u", dim, CEED_EVAL_GRAD);
83   CeedQFunctionAddInput(qf_apply, "qdata_diff", q_data_size, CEED_EVAL_NONE);
84   CeedQFunctionAddOutput(qf_apply, "v", dim, CEED_EVAL_GRAD);
85 
86   // Operator - apply
87   CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE,
88                      &op_apply);
89   CeedOperatorSetField(op_apply, "u", elem_restr_u_i, basis_u,
90                        CEED_VECTOR_ACTIVE);
91   CeedOperatorSetField(op_apply, "qdata_diff", elem_restr_qd_i,
92                        CEED_BASIS_COLLOCATED, q_data_diff);
93   CeedOperatorSetField(op_apply, "v", elem_restr_u_i, basis_u,
94                        CEED_VECTOR_ACTIVE);
95 
96   // Create FDM element inverse
97   CeedOperatorCreateFDMElementInverse(op_apply, &op_inv, CEED_REQUEST_IMMEDIATE);
98 
99   // Create vectors
100   CeedVectorCreate(ceed, num_dofs, &U);
101   CeedVectorCreate(ceed, num_dofs, &V);
102   CeedVectorCreate(ceed, num_dofs, &W);
103 
104   // Create Schur complement for element corners
105   CeedScalar S[16];
106   for (CeedInt i = 0; i < 4; i++) {
107     CeedScalar *u;
108     CeedVectorSetValue(U, 0.0);
109     CeedVectorGetArray(U, CEED_MEM_HOST, &u);
110     switch (i) {
111     case 0: u[0]     = 1.0; break;
112     case 1: u[P-1]   = 1.0; break;
113     case 2: u[P*P-P] = 1.0; break;
114     case 3: u[P*P-1] = 1.0; break;
115     }
116     CeedVectorRestoreArray(U, &u);
117 
118     CeedOperatorApply(op_inv, U, V, CEED_REQUEST_IMMEDIATE);
119 
120     const CeedScalar *v;
121     CeedVectorGetArrayRead(V, CEED_MEM_HOST, &v);
122     S[0*4+i] = -v[0];
123     S[1*4+i] = -v[P-1];
124     S[2*4+i] = -v[P*P-P];
125     S[3*4+i] = -v[P*P-1];
126     CeedVectorRestoreArrayRead(V, &v);
127   }
128   CeedScalar S_inv[16];
129   {
130     CeedScalar det;
131     S_inv[0] =  S[5]  * S[10] * S[15] -
132                 S[5]  * S[11] * S[14] -
133                 S[9]  * S[6]  * S[15] +
134                 S[9]  * S[7]  * S[14] +
135                 S[13] * S[6]  * S[11] -
136                 S[13] * S[7]  * S[10];
137 
138     S_inv[4] =  -S[4] * S[10] * S[15] +
139                 S[4]  * S[11] * S[14] +
140                 S[8]  * S[6]  * S[15] -
141                 S[8]  * S[7]  * S[14] -
142                 S[12] * S[6]  * S[11] +
143                 S[12] * S[7]  * S[10];
144 
145     S_inv[8] =  S[4]  * S[9]  * S[15] -
146                 S[4]  * S[11] * S[13] -
147                 S[8]  * S[5]  * S[15] +
148                 S[8]  * S[7]  * S[13] +
149                 S[12] * S[5]  * S[11] -
150                 S[12] * S[7]  * S[9];
151 
152     S_inv[12] = -S[4] * S[9]  * S[14] +
153                 S[4]  * S[10] * S[13] +
154                 S[8]  * S[5]  * S[14] -
155                 S[8]  * S[6]  * S[13] -
156                 S[12] * S[5]  * S[10] +
157                 S[12] * S[6]  * S[9];
158 
159     S_inv[1] =  -S[1] * S[10] * S[15] +
160                 S[1]  * S[11] * S[14] +
161                 S[9]  * S[2]  * S[15] -
162                 S[9]  * S[3]  * S[14] -
163                 S[13] * S[2]  * S[11] +
164                 S[13] * S[3]  * S[10];
165 
166     S_inv[5] =  S[0]  * S[10] * S[15] -
167                 S[0]  * S[11] * S[14] -
168                 S[8]  * S[2]  * S[15] +
169                 S[8]  * S[3]  * S[14] +
170                 S[12] * S[2]  * S[11] -
171                 S[12] * S[3]  * S[10];
172 
173     S_inv[9] =  -S[0] * S[9]  * S[15] +
174                 S[0]  * S[11] * S[13] +
175                 S[8]  * S[1]  * S[15] -
176                 S[8]  * S[3]  * S[13] -
177                 S[12] * S[1]  * S[11] +
178                 S[12] * S[3]  * S[9];
179 
180     S_inv[13] = S[0]  * S[9]  * S[14] -
181                 S[0]  * S[10] * S[13] -
182                 S[8]  * S[1]  * S[14] +
183                 S[8]  * S[2]  * S[13] +
184                 S[12] * S[1]  * S[10] -
185                 S[12] * S[2]  * S[9];
186 
187     S_inv[2] =  S[1]  * S[6]  * S[15] -
188                 S[1]  * S[7]  * S[14] -
189                 S[5]  * S[2]  * S[15] +
190                 S[5]  * S[3]  * S[14] +
191                 S[13] * S[2]  * S[7]  -
192                 S[13] * S[3]  * S[6];
193 
194     S_inv[6] =  -S[0] * S[6]  * S[15] +
195                 S[0]  * S[7]  * S[14] +
196                 S[4]  * S[2]  * S[15] -
197                 S[4]  * S[3]  * S[14] -
198                 S[12] * S[2]  * S[7] +
199                 S[12] * S[3]  * S[6];
200 
201     S_inv[10] = S[0]  * S[5]  * S[15] -
202                 S[0]  * S[7]  * S[13] -
203                 S[4]  * S[1]  * S[15] +
204                 S[4]  * S[3]  * S[13] +
205                 S[12] * S[1]  * S[7]  -
206                 S[12] * S[3]  * S[5];
207 
208     S_inv[14] = -S[0] * S[5]  * S[14] +
209                 S[0]  * S[6]  * S[13] +
210                 S[4]  * S[1]  * S[14] -
211                 S[4]  * S[2]  * S[13] -
212                 S[12] * S[1]  * S[6]  +
213                 S[12] * S[2]  * S[5];
214 
215     S_inv[3] =  -S[1] * S[6]  * S[11] +
216                 S[1]  * S[7]  * S[10] +
217                 S[5]  * S[2]  * S[11] -
218                 S[5]  * S[3]  * S[10] -
219                 S[9]  * S[2]  * S[7]  +
220                 S[9]  * S[3]  * S[6];
221 
222     S_inv[7] =  S[0]  * S[6]  * S[11] -
223                 S[0]  * S[7]  * S[10] -
224                 S[4]  * S[2]  * S[11] +
225                 S[4]  * S[3]  * S[10] +
226                 S[8]  * S[2]  * S[7]  -
227                 S[8]  * S[3]  * S[6];
228 
229     S_inv[11] = -S[0] * S[5]  * S[11] +
230                 S[0]  * S[7]  * S[9]  +
231                 S[4]  * S[1]  * S[11] -
232                 S[4]  * S[3]  * S[9]  -
233                 S[8]  * S[1]  * S[7]  +
234                 S[8]  * S[3]  * S[5];
235 
236     S_inv[15] = S[0]  * S[5]  * S[10] -
237                 S[0]  * S[6]  * S[9]  -
238                 S[4]  * S[1]  * S[10] +
239                 S[4]  * S[2]  * S[9]  +
240                 S[8]  * S[1]  * S[6]  -
241                 S[8]  * S[2]  * S[5];
242 
243     det = 1/(S[0]*S_inv[0] + S[1]*S_inv[4] + S[2]*S_inv[8] + S[3]*S_inv[12]);
244 
245     for (CeedInt i = 0; i < 16; i++)
246       S_inv[i] *= det;
247   }
248 
249   // Set initial values
250   {
251     CeedScalar nodes[P];
252     CeedLobattoQuadrature(P, nodes, NULL);
253     CeedScalar *u;
254     CeedVectorGetArray(U, CEED_MEM_HOST, &u);
255     for (CeedInt i=0; i<P; i++)
256       for (CeedInt j=0; j<P; j++)
257         u[i*P+j] = -(nodes[i] - 1.0)*(nodes[i] + 1.0) -
258                    (nodes[j] - 1.0)*(nodes[j] + 1.0);
259     CeedVectorRestoreArray(U, &u);
260   }
261 
262   // Apply original operator
263   CeedOperatorApply(op_apply, U, V, CEED_REQUEST_IMMEDIATE);
264 
265   // Apply FDM element inverse
266   {
267     // -- Zero corners
268     CeedScalar *v;
269     CeedVectorGetArray(V, CEED_MEM_HOST, &v);
270     v[0]     = 0.0;
271     v[P-1]   = 0.0;
272     v[P*P-P] = 0.0;
273     v[P*P-1] = 0.0;
274     CeedVectorRestoreArray(V, &v);
275 
276     // -- Apply FDM inverse to interior
277     CeedOperatorApply(op_inv, V, W, CEED_REQUEST_IMMEDIATE);
278 
279     // -- Pick off corners
280     const CeedScalar *w;
281     CeedScalar w_Pi[4];
282     CeedVectorGetArrayRead(W, CEED_MEM_HOST, &w);
283     w_Pi[0] = w[0];
284     w_Pi[1] = w[P-1];
285     w_Pi[2] = w[P*P-P];
286     w_Pi[3] = w[P*P-1];
287     CeedVectorRestoreArrayRead(W, &w);
288 
289     // -- Apply inverse of Schur complement
290     CeedScalar v_Pi[4];
291     for (CeedInt i=0; i<4; i++) {
292       CeedScalar sum = 0.0;
293       for (CeedInt j=0; j<4; j++) {
294         sum += w_Pi[j] * S_inv[i*4+j];
295       }
296       v_Pi[i] = sum;
297     }
298 
299     // -- Set corners
300     CeedVectorGetArray(V, CEED_MEM_HOST, &v);
301     v[0]     = v_Pi[0];
302     v[P-1]   = v_Pi[1];
303     v[P*P-P] = v_Pi[2];
304     v[P*P-1] = v_Pi[3];
305     CeedVectorRestoreArray(V, &v);
306 
307     // -- Apply full FDM inverse again
308     CeedOperatorApply(op_inv, V, W, CEED_REQUEST_IMMEDIATE);
309   }
310 
311 
312   // Check output
313   {
314     const CeedScalar *u, *w;
315     CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u);
316     CeedVectorGetArrayRead(W, CEED_MEM_HOST, &w);
317     for (CeedInt i=0; i<P; i++)
318       for (CeedInt j=0; j<P; j++)
319         if (fabs(u[i*P+j] - w[i*P+j]) > 2e-3)
320           // LCOV_EXCL_START
321           printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in inverse: %e != %e\n",
322                  i, j, w[i*P+j], u[i*P+j]);
323     // LCOV_EXCL_STOP
324     CeedVectorRestoreArrayRead(U, &u);
325     CeedVectorRestoreArrayRead(W, &w);
326   }
327 
328   // Cleanup
329   CeedQFunctionDestroy(&qf_setup_diff);
330   CeedQFunctionDestroy(&qf_apply);
331   CeedOperatorDestroy(&op_setup_diff);
332   CeedOperatorDestroy(&op_apply);
333   CeedOperatorDestroy(&op_inv);
334   CeedElemRestrictionDestroy(&elem_restr_u_i);
335   CeedElemRestrictionDestroy(&elem_restr_x_i);
336   CeedElemRestrictionDestroy(&elem_restr_qd_i);
337   CeedBasisDestroy(&basis_x);
338   CeedBasisDestroy(&basis_u);
339   CeedVectorDestroy(&X);
340   CeedVectorDestroy(&q_data_diff);
341   CeedVectorDestroy(&U);
342   CeedVectorDestroy(&V);
343   CeedVectorDestroy(&W);
344   CeedDestroy(&ceed);
345   return 0;
346 }
347