xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 3d8e882215d238700cdceb37404f76ca7fa24eaa)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <stdbool.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include "ceed-blocked.h"
14 
15 //------------------------------------------------------------------------------
16 // Setup Input/Output Fields
17 //------------------------------------------------------------------------------
18 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
19     CeedOperator op, bool is_input, CeedElemRestriction *blk_restr,
20     CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs,
21     CeedInt start_e, CeedInt num_fields, CeedInt Q) {
22   CeedInt ierr, num_comp, size, P;
23   Ceed ceed;
24   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
25   CeedBasis basis;
26   CeedElemRestriction r;
27   CeedOperatorField *op_fields;
28   CeedQFunctionField *qf_fields;
29   if (is_input) {
30     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
31     CeedChkBackend(ierr);
32     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
33     CeedChkBackend(ierr);
34   } else {
35     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields);
36     CeedChkBackend(ierr);
37     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
38     CeedChkBackend(ierr);
39   }
40   const CeedInt blk_size = 8;
41 
42   // Loop over fields
43   for (CeedInt i=0; i<num_fields; i++) {
44     CeedEvalMode eval_mode;
45     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
46     CeedChkBackend(ierr);
47 
48     if (eval_mode != CEED_EVAL_WEIGHT) {
49       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
50       CeedChkBackend(ierr);
51       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
52       CeedSize l_size;
53       CeedInt num_elem, elem_size, comp_stride;
54       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
55       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
56       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
57       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
58 
59       bool strided;
60       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
61       if (strided) {
62         CeedInt strides[3];
63         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
64         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
65                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
66         CeedChkBackend(ierr);
67       } else {
68         const CeedInt *offsets = NULL;
69         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
70         CeedChkBackend(ierr);
71         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
72         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
73                                                 blk_size, num_comp, comp_stride,
74                                                 l_size, CEED_MEM_HOST,
75                                                 CEED_COPY_VALUES, offsets,
76                                                 &blk_restr[i+start_e]);
77         CeedChkBackend(ierr);
78         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
79       }
80       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
81                                              &e_vecs_full[i+start_e]);
82       CeedChkBackend(ierr);
83     }
84 
85     switch(eval_mode) {
86     case CEED_EVAL_NONE:
87       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
88       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
89       CeedChkBackend(ierr);
90       break;
91     case CEED_EVAL_INTERP:
92     case CEED_EVAL_GRAD:
93       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
94       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
95       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
96       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
97       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
98       CeedChkBackend(ierr);
99       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
100       CeedChkBackend(ierr);
101       break;
102     case CEED_EVAL_WEIGHT: // Only on input fields
103       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
104       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
105       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
106                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
107       CeedChkBackend(ierr);
108 
109       break;
110     case CEED_EVAL_DIV:
111       break; // Not implemented
112     case CEED_EVAL_CURL:
113       break; // Not implemented
114     }
115   }
116   return CEED_ERROR_SUCCESS;
117 }
118 
119 //------------------------------------------------------------------------------
120 // Setup Operator
121 //------------------------------------------------------------------------------
122 static int CeedOperatorSetup_Blocked(CeedOperator op) {
123   int ierr;
124   bool is_setup_done;
125   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
126   if (is_setup_done) return CEED_ERROR_SUCCESS;
127   Ceed ceed;
128   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
129   CeedOperator_Blocked *impl;
130   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
131   CeedQFunction qf;
132   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
133   CeedInt Q, num_input_fields, num_output_fields;
134   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
135   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
136   CeedOperatorField *op_input_fields, *op_output_fields;
137   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
138                                &num_output_fields, &op_output_fields);
139   CeedChkBackend(ierr);
140   CeedQFunctionField *qf_input_fields, *qf_output_fields;
141   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
142                                 &qf_output_fields);
143   CeedChkBackend(ierr);
144 
145   // Allocate
146   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
147   CeedChkBackend(ierr);
148   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
149   CeedChkBackend(ierr);
150 
151   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
152   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
153   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
154   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
155   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
156 
157   impl->num_inputs = num_input_fields;
158   impl->num_outputs = num_output_fields;
159 
160   // Set up infield and outfield pointer arrays
161   // Infields
162   ierr = CeedOperatorSetupFields_Blocked(qf, op, true, impl->blk_restr,
163                                          impl->e_vecs_full, impl->e_vecs_in,
164                                          impl->q_vecs_in, 0,
165                                          num_input_fields, Q);
166   CeedChkBackend(ierr);
167   // Outfields
168   ierr = CeedOperatorSetupFields_Blocked(qf, op, false, impl->blk_restr,
169                                          impl->e_vecs_full, impl->e_vecs_out,
170                                          impl->q_vecs_out, num_input_fields,
171                                          num_output_fields, Q);
172   CeedChkBackend(ierr);
173 
174   // Identity QFunctions
175   if (impl->is_identity_qf) {
176     CeedEvalMode in_mode, out_mode;
177     CeedQFunctionField *in_fields, *out_fields;
178     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
179     CeedChkBackend(ierr);
180     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
181     CeedChkBackend(ierr);
182     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
183     CeedChkBackend(ierr);
184 
185     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
186       impl->is_identity_restr_op = true;
187     } else {
188       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
189       impl->q_vecs_out[0] = impl->q_vecs_in[0];
190       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
191     }
192   }
193 
194   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
195 
196   return CEED_ERROR_SUCCESS;
197 }
198 
199 //------------------------------------------------------------------------------
200 // Setup Operator Inputs
201 //------------------------------------------------------------------------------
202 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
203     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
204     CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
205     CeedOperator_Blocked *impl, CeedRequest *request) {
206   CeedInt ierr;
207   CeedEvalMode eval_mode;
208   CeedVector vec;
209   uint64_t state;
210 
211   for (CeedInt i=0; i<num_input_fields; i++) {
212     // Get input vector
213     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
214     CeedChkBackend(ierr);
215     if (vec == CEED_VECTOR_ACTIVE) {
216       if (skip_active)
217         continue;
218       else
219         vec = in_vec;
220     }
221 
222     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
223     CeedChkBackend(ierr);
224     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
225     } else {
226       // Restrict
227       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
228       if (state != impl->input_states[i] || vec == in_vec) {
229         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
230                                         vec, impl->e_vecs_full[i], request);
231         CeedChkBackend(ierr);
232         impl->input_states[i] = state;
233       }
234       // Get evec
235       ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
236                                     (const CeedScalar **) &e_data_full[i]);
237       CeedChkBackend(ierr);
238     }
239   }
240   return CEED_ERROR_SUCCESS;
241 }
242 
243 //------------------------------------------------------------------------------
244 // Input Basis Action
245 //------------------------------------------------------------------------------
246 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
247     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
248     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
249     CeedScalar *e_data_full[2*CEED_FIELD_MAX], CeedOperator_Blocked *impl) {
250   CeedInt ierr;
251   CeedInt dim, elem_size, size;
252   CeedElemRestriction elem_restr;
253   CeedEvalMode eval_mode;
254   CeedBasis basis;
255 
256   for (CeedInt i=0; i<num_input_fields; i++) {
257     // Skip active input
258     if (skip_active) {
259       CeedVector vec;
260       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
261       CeedChkBackend(ierr);
262       if (vec == CEED_VECTOR_ACTIVE)
263         continue;
264     }
265 
266     // Get elem_size, eval_mode, size
267     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
268     CeedChkBackend(ierr);
269     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
270     CeedChkBackend(ierr);
271     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
272     CeedChkBackend(ierr);
273     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
274     CeedChkBackend(ierr);
275     // Basis action
276     switch(eval_mode) {
277     case CEED_EVAL_NONE:
278       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
279                                 CEED_USE_POINTER, &e_data_full[i][e*Q*size]);
280       CeedChkBackend(ierr);
281       break;
282     case CEED_EVAL_INTERP:
283       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
284       CeedChkBackend(ierr);
285       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
286                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size]);
287       CeedChkBackend(ierr);
288       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
289                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
290                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
291       break;
292     case CEED_EVAL_GRAD:
293       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
294       CeedChkBackend(ierr);
295       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
296       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
297                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size/dim]);
298       CeedChkBackend(ierr);
299       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
300                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
301                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
302       break;
303     case CEED_EVAL_WEIGHT:
304       break;  // No action
305     // LCOV_EXCL_START
306     case CEED_EVAL_DIV:
307     case CEED_EVAL_CURL: {
308       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
309       CeedChkBackend(ierr);
310       Ceed ceed;
311       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
312       return CeedError(ceed, CEED_ERROR_BACKEND,
313                        "Ceed evaluation mode not implemented");
314       // LCOV_EXCL_STOP
315     }
316     }
317   }
318   return CEED_ERROR_SUCCESS;
319 }
320 
321 //------------------------------------------------------------------------------
322 // Output Basis Action
323 //------------------------------------------------------------------------------
324 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
325     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
326     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
327     CeedOperator op, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
328     CeedOperator_Blocked *impl) {
329   CeedInt ierr;
330   CeedInt dim, elem_size, size;
331   CeedElemRestriction elem_restr;
332   CeedEvalMode eval_mode;
333   CeedBasis basis;
334 
335   for (CeedInt i=0; i<num_output_fields; i++) {
336     // Get elem_size, eval_mode, size
337     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
338     CeedChkBackend(ierr);
339     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
340     CeedChkBackend(ierr);
341     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
342     CeedChkBackend(ierr);
343     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
344     CeedChkBackend(ierr);
345     // Basis action
346     switch(eval_mode) {
347     case CEED_EVAL_NONE:
348       break; // No action
349     case CEED_EVAL_INTERP:
350       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
351       CeedChkBackend(ierr);
352       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
353                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size]);
354       CeedChkBackend(ierr);
355       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
356                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
357                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
358       break;
359     case CEED_EVAL_GRAD:
360       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
361       CeedChkBackend(ierr);
362       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
363       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
364                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size/dim]);
365       CeedChkBackend(ierr);
366       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
367                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
368                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
369       break;
370     // LCOV_EXCL_START
371     case CEED_EVAL_WEIGHT: {
372       Ceed ceed;
373       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
374       return CeedError(ceed, CEED_ERROR_BACKEND,
375                        "CEED_EVAL_WEIGHT cannot be an output "
376                        "evaluation mode");
377     }
378     case CEED_EVAL_DIV:
379     case CEED_EVAL_CURL: {
380       Ceed ceed;
381       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
382       return CeedError(ceed, CEED_ERROR_BACKEND,
383                        "Ceed evaluation mode not implemented");
384       // LCOV_EXCL_STOP
385     }
386     }
387   }
388   return CEED_ERROR_SUCCESS;
389 }
390 
391 //------------------------------------------------------------------------------
392 // Restore Input Vectors
393 //------------------------------------------------------------------------------
394 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
395     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
396     bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
397     CeedOperator_Blocked *impl) {
398   CeedInt ierr;
399   CeedEvalMode eval_mode;
400 
401   for (CeedInt i=0; i<num_input_fields; i++) {
402     // Skip active inputs
403     if (skip_active) {
404       CeedVector vec;
405       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
406       CeedChkBackend(ierr);
407       if (vec == CEED_VECTOR_ACTIVE)
408         continue;
409     }
410     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
411     CeedChkBackend(ierr);
412     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
413     } else {
414       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
415                                         (const CeedScalar **) &e_data_full[i]);
416       CeedChkBackend(ierr);
417     }
418   }
419   return CEED_ERROR_SUCCESS;
420 }
421 
422 //------------------------------------------------------------------------------
423 // Operator Apply
424 //------------------------------------------------------------------------------
425 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
426                                         CeedVector out_vec,
427                                         CeedRequest *request) {
428   int ierr;
429   CeedOperator_Blocked *impl;
430   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
431   const CeedInt blk_size = 8;
432   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
433   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
434   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
435   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
436   CeedQFunction qf;
437   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
438   CeedOperatorField *op_input_fields, *op_output_fields;
439   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
440                                &num_output_fields, &op_output_fields);
441   CeedChkBackend(ierr);
442   CeedQFunctionField *qf_input_fields, *qf_output_fields;
443   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
444                                 &qf_output_fields);
445   CeedChkBackend(ierr);
446   CeedEvalMode eval_mode;
447   CeedVector vec;
448   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
449 
450   // Setup
451   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
452 
453   // Restriction only operator
454   if (impl->is_identity_restr_op) {
455     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
456                                     impl->e_vecs_full[0], request); CeedChkBackend(ierr);
457     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
458                                     impl->e_vecs_full[0], out_vec, request); CeedChkBackend(ierr);
459     return CEED_ERROR_SUCCESS;
460   }
461 
462   // Input Evecs and Restriction
463   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
464                                          op_input_fields, in_vec, false, e_data_full,
465                                          impl, request); CeedChkBackend(ierr);
466 
467   // Output Evecs
468   for (CeedInt i=0; i<num_output_fields; i++) {
469     ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs],
470                                    CEED_MEM_HOST, &e_data_full[i + num_input_fields]);
471     CeedChkBackend(ierr);
472   }
473 
474   // Loop through elements
475   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
476     // Output pointers
477     for (CeedInt i=0; i<num_output_fields; i++) {
478       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
479       CeedChkBackend(ierr);
480       if (eval_mode == CEED_EVAL_NONE) {
481         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
482         CeedChkBackend(ierr);
483         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
484                                   CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*Q*size]);
485         CeedChkBackend(ierr);
486       }
487     }
488 
489     // Input basis apply
490     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
491                                           num_input_fields, blk_size, false, e_data_full,
492                                           impl); CeedChkBackend(ierr);
493 
494     // Q function
495     if (!impl->is_identity_qf) {
496       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
497       CeedChkBackend(ierr);
498     }
499 
500     // Output basis apply
501     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
502                                            blk_size, num_input_fields,
503                                            num_output_fields, op, e_data_full, impl);
504     CeedChkBackend(ierr);
505   }
506 
507   // Output restriction
508   for (CeedInt i=0; i<num_output_fields; i++) {
509     // Restore evec
510     ierr = CeedVectorRestoreArray(impl->e_vecs_full[i+impl->num_inputs],
511                                   &e_data_full[i + num_input_fields]);
512     CeedChkBackend(ierr);
513     // Get output vector
514     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
515     CeedChkBackend(ierr);
516     // Active
517     if (vec == CEED_VECTOR_ACTIVE)
518       vec = out_vec;
519     // Restrict
520     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_inputs],
521                                     CEED_TRANSPOSE, impl->e_vecs_full[i+impl->num_inputs],
522                                     vec, request); CeedChkBackend(ierr);
523 
524   }
525 
526   // Restore input arrays
527   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
528          op_input_fields, false, e_data_full, impl); CeedChkBackend(ierr);
529 
530   return CEED_ERROR_SUCCESS;
531 }
532 
533 //------------------------------------------------------------------------------
534 // Core code for assembling linear QFunction
535 //------------------------------------------------------------------------------
536 static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(
537   CeedOperator op, bool build_objects, CeedVector *assembled,
538   CeedElemRestriction *rstr, CeedRequest *request) {
539   int ierr;
540   CeedOperator_Blocked *impl;
541   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
542   const CeedInt blk_size = 8;
543   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
544   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
545   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
546   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
547   CeedQFunction qf;
548   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
549   CeedOperatorField *op_input_fields, *op_output_fields;
550   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
551                                &num_output_fields, &op_output_fields);
552   CeedChkBackend(ierr);
553   CeedQFunctionField *qf_input_fields, *qf_output_fields;
554   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
555                                 &qf_output_fields);
556   CeedChkBackend(ierr);
557   CeedVector vec, l_vec = impl->qf_l_vec;
558   CeedInt num_active_in = impl->num_active_in,
559           num_active_out = impl->num_active_out;
560   CeedVector *active_in = impl->qf_active_in;
561   CeedScalar *a, *tmp;
562   Ceed ceed;
563   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
564   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
565 
566   // Setup
567   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
568 
569   // Check for identity
570   if (impl->is_identity_qf)
571     // LCOV_EXCL_START
572     return CeedError(ceed, CEED_ERROR_BACKEND,
573                      "Assembling identity QFunctions not supported");
574   // LCOV_EXCL_STOP
575 
576   // Input Evecs and Restriction
577   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
578                                          op_input_fields, NULL, true, e_data_full,
579                                          impl, request); CeedChkBackend(ierr);
580 
581   // Count number of active input fields
582   if (!num_active_in) {
583     for (CeedInt i=0; i<num_input_fields; i++) {
584       // Get input vector
585       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
586       CeedChkBackend(ierr);
587       // Check if active input
588       if (vec == CEED_VECTOR_ACTIVE) {
589         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
590         CeedChkBackend(ierr);
591         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
592         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
593         CeedChkBackend(ierr);
594         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
595         for (CeedInt field=0; field<size; field++) {
596           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
597           CeedChkBackend(ierr);
598           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
599                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
600           CeedChkBackend(ierr);
601         }
602         num_active_in += size;
603         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
604       }
605     }
606     impl->num_active_in = num_active_in;
607     impl->qf_active_in = active_in;
608   }
609 
610   // Count number of active output fields
611   if (!num_active_out) {
612     for (CeedInt i=0; i<num_output_fields; i++) {
613       // Get output vector
614       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
615       CeedChkBackend(ierr);
616       // Check if active output
617       if (vec == CEED_VECTOR_ACTIVE) {
618         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
619         CeedChkBackend(ierr);
620         num_active_out += size;
621       }
622     }
623     impl->num_active_out = num_active_out;
624   }
625 
626   // Check sizes
627   if (!num_active_in || !num_active_out)
628     // LCOV_EXCL_START
629     return CeedError(ceed, CEED_ERROR_BACKEND,
630                      "Cannot assemble QFunction without active inputs "
631                      "and outputs");
632   // LCOV_EXCL_STOP
633 
634   // Setup Lvec
635   if (!l_vec) {
636     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
637                             &l_vec); CeedChkBackend(ierr);
638     impl->qf_l_vec = l_vec;
639   }
640   ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
641 
642   // Build objects if needed
643   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
644   if (build_objects) {
645     // Create output restriction
646     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
647                                             num_active_in*num_active_out,
648                                             num_active_in*num_active_out*num_elem*Q,
649                                             strides, rstr); CeedChkBackend(ierr);
650     // Create assembled vector
651     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
652                             assembled); CeedChkBackend(ierr);
653   }
654 
655   // Loop through elements
656   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
657     // Input basis apply
658     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
659                                           num_input_fields, blk_size, true, e_data_full,
660                                           impl); CeedChkBackend(ierr);
661 
662     // Assemble QFunction
663     for (CeedInt in=0; in<num_active_in; in++) {
664       // Set Inputs
665       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
666       if (num_active_in > 1) {
667         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
668                                   0.0); CeedChkBackend(ierr);
669       }
670       // Set Outputs
671       for (CeedInt out=0; out<num_output_fields; out++) {
672         // Get output vector
673         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
674         CeedChkBackend(ierr);
675         // Check if active output
676         if (vec == CEED_VECTOR_ACTIVE) {
677           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
678                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
679           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
680           CeedChkBackend(ierr);
681           a += size*Q*blk_size; // Advance the pointer by the size of the output
682         }
683       }
684       // Apply QFunction
685       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
686       CeedChkBackend(ierr);
687     }
688   }
689 
690   // Un-set output Qvecs to prevent accidental overwrite of Assembled
691   for (CeedInt out=0; out<num_output_fields; out++) {
692     // Get output vector
693     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
694     CeedChkBackend(ierr);
695     // Check if active output
696     if (vec == CEED_VECTOR_ACTIVE) {
697       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
698                          NULL); CeedChkBackend(ierr);
699     }
700   }
701 
702   // Restore input arrays
703   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
704          op_input_fields, true, e_data_full, impl); CeedChkBackend(ierr);
705 
706   // Output blocked restriction
707   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
708   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
709   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
710   if (!impl->qf_blk_rstr) {
711     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
712            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
713            strides, &blk_rstr); CeedChkBackend(ierr);
714     impl->qf_blk_rstr = blk_rstr;
715   }
716   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
717                                   request); CeedChkBackend(ierr);
718 
719   return CEED_ERROR_SUCCESS;
720 }
721 
722 //------------------------------------------------------------------------------
723 // Assemble Linear QFunction
724 //------------------------------------------------------------------------------
725 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
726     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
727   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled,
728          rstr, request);
729 }
730 
731 //------------------------------------------------------------------------------
732 // Update Assembled Linear QFunction
733 //------------------------------------------------------------------------------
734 static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op,
735     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
736   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled,
737          &rstr, request);
738 }
739 
740 //------------------------------------------------------------------------------
741 // Operator Destroy
742 //------------------------------------------------------------------------------
743 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
744   int ierr;
745   CeedOperator_Blocked *impl;
746   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
747 
748   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
749     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
750     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
751   }
752   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
753   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
754   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
755 
756   for (CeedInt i=0; i<impl->num_inputs; i++) {
757     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
758     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
759   }
760   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
761   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
762 
763   for (CeedInt i=0; i<impl->num_outputs; i++) {
764     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
765     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
766   }
767   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
768   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
769 
770   // QFunction assembly data
771   for (CeedInt i=0; i<impl->num_active_in; i++) {
772     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
773   }
774   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
775   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
776   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
777 
778   ierr = CeedFree(&impl); CeedChkBackend(ierr);
779   return CEED_ERROR_SUCCESS;
780 }
781 
782 //------------------------------------------------------------------------------
783 // Operator Create
784 //------------------------------------------------------------------------------
785 int CeedOperatorCreate_Blocked(CeedOperator op) {
786   int ierr;
787   Ceed ceed;
788   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
789   CeedOperator_Blocked *impl;
790 
791   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
792   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
793 
794   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
795                                 CeedOperatorLinearAssembleQFunction_Blocked);
796   CeedChkBackend(ierr);
797   ierr = CeedSetBackendFunction(ceed, "Operator", op,
798                                 "LinearAssembleQFunctionUpdate",
799                                 CeedOperatorLinearAssembleQFunctionUpdate_Blocked);
800   CeedChkBackend(ierr);
801   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
802                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
803   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
804                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
805   return CEED_ERROR_SUCCESS;
806 }
807 //------------------------------------------------------------------------------
808