xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision f4f98f9d61969ce0e2a6a2f6adc6df63e4d4c354)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include "ceed-blocked.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28     CeedOperator op, bool is_input, CeedElemRestriction *blk_restr,
29     CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs,
30     CeedInt start_e, CeedInt num_fields, CeedInt Q) {
31   CeedInt ierr, num_comp, size, P;
32   Ceed ceed;
33   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
34   CeedBasis basis;
35   CeedElemRestriction r;
36   CeedOperatorField *op_fields;
37   CeedQFunctionField *qf_fields;
38   if (is_input) {
39     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
40     CeedChkBackend(ierr);
41     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
42     CeedChkBackend(ierr);
43   } else {
44     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields);
45     CeedChkBackend(ierr);
46     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
47     CeedChkBackend(ierr);
48   }
49   const CeedInt blk_size = 8;
50 
51   // Loop over fields
52   for (CeedInt i=0; i<num_fields; i++) {
53     CeedEvalMode eval_mode;
54     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
55     CeedChkBackend(ierr);
56 
57     if (eval_mode != CEED_EVAL_WEIGHT) {
58       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
59       CeedChkBackend(ierr);
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
61       CeedSize l_size;
62       CeedInt num_elem, elem_size, comp_stride;
63       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
64       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
66       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
67 
68       bool strided;
69       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
70       if (strided) {
71         CeedInt strides[3];
72         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
73         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
74                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
75         CeedChkBackend(ierr);
76       } else {
77         const CeedInt *offsets = NULL;
78         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
79         CeedChkBackend(ierr);
80         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
81         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
82                                                 blk_size, num_comp, comp_stride,
83                                                 l_size, CEED_MEM_HOST,
84                                                 CEED_COPY_VALUES, offsets,
85                                                 &blk_restr[i+start_e]);
86         CeedChkBackend(ierr);
87         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
88       }
89       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
90                                              &e_vecs_full[i+start_e]);
91       CeedChkBackend(ierr);
92     }
93 
94     switch(eval_mode) {
95     case CEED_EVAL_NONE:
96       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
98       CeedChkBackend(ierr);
99       break;
100     case CEED_EVAL_INTERP:
101     case CEED_EVAL_GRAD:
102       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
103       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
104       ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr);
105       ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr);
106       ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]);
107       CeedChkBackend(ierr);
108       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
109       CeedChkBackend(ierr);
110       break;
111     case CEED_EVAL_WEIGHT: // Only on input fields
112       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
113       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
114       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
115                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
116       CeedChkBackend(ierr);
117 
118       break;
119     case CEED_EVAL_DIV:
120       break; // Not implemented
121     case CEED_EVAL_CURL:
122       break; // Not implemented
123     }
124   }
125   return CEED_ERROR_SUCCESS;
126 }
127 
128 //------------------------------------------------------------------------------
129 // Setup Operator
130 //------------------------------------------------------------------------------
131 static int CeedOperatorSetup_Blocked(CeedOperator op) {
132   int ierr;
133   bool is_setup_done;
134   ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr);
135   if (is_setup_done) return CEED_ERROR_SUCCESS;
136   Ceed ceed;
137   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
138   CeedOperator_Blocked *impl;
139   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
140   CeedQFunction qf;
141   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
142   CeedInt Q, num_input_fields, num_output_fields;
143   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
144   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
145   CeedOperatorField *op_input_fields, *op_output_fields;
146   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
147                                &num_output_fields, &op_output_fields);
148   CeedChkBackend(ierr);
149   CeedQFunctionField *qf_input_fields, *qf_output_fields;
150   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
151                                 &qf_output_fields);
152   CeedChkBackend(ierr);
153 
154   // Allocate
155   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
156   CeedChkBackend(ierr);
157   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full);
158   CeedChkBackend(ierr);
159 
160   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr);
161   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr);
162   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr);
163   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr);
164   ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr);
165 
166   impl->num_inputs = num_input_fields;
167   impl->num_outputs = num_output_fields;
168 
169   // Set up infield and outfield pointer arrays
170   // Infields
171   ierr = CeedOperatorSetupFields_Blocked(qf, op, true, impl->blk_restr,
172                                          impl->e_vecs_full, impl->e_vecs_in,
173                                          impl->q_vecs_in, 0,
174                                          num_input_fields, Q);
175   CeedChkBackend(ierr);
176   // Outfields
177   ierr = CeedOperatorSetupFields_Blocked(qf, op, false, impl->blk_restr,
178                                          impl->e_vecs_full, impl->e_vecs_out,
179                                          impl->q_vecs_out, num_input_fields,
180                                          num_output_fields, Q);
181   CeedChkBackend(ierr);
182 
183   // Identity QFunctions
184   if (impl->is_identity_qf) {
185     CeedEvalMode in_mode, out_mode;
186     CeedQFunctionField *in_fields, *out_fields;
187     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
188     CeedChkBackend(ierr);
189     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
190     CeedChkBackend(ierr);
191     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
192     CeedChkBackend(ierr);
193 
194     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
195       impl->is_identity_restr_op = true;
196     } else {
197       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
198       impl->q_vecs_out[0] = impl->q_vecs_in[0];
199       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
200     }
201   }
202 
203   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
204 
205   return CEED_ERROR_SUCCESS;
206 }
207 
208 //------------------------------------------------------------------------------
209 // Setup Operator Inputs
210 //------------------------------------------------------------------------------
211 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
212     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
213     CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
214     CeedOperator_Blocked *impl, CeedRequest *request) {
215   CeedInt ierr;
216   CeedEvalMode eval_mode;
217   CeedVector vec;
218   uint64_t state;
219 
220   for (CeedInt i=0; i<num_input_fields; i++) {
221     // Get input vector
222     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
223     CeedChkBackend(ierr);
224     if (vec == CEED_VECTOR_ACTIVE) {
225       if (skip_active)
226         continue;
227       else
228         vec = in_vec;
229     }
230 
231     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
232     CeedChkBackend(ierr);
233     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
234     } else {
235       // Restrict
236       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
237       if (state != impl->input_states[i] || vec == in_vec) {
238         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
239                                         vec, impl->e_vecs_full[i], request);
240         CeedChkBackend(ierr);
241         impl->input_states[i] = state;
242       }
243       // Get evec
244       ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST,
245                                     (const CeedScalar **) &e_data_full[i]);
246       CeedChkBackend(ierr);
247     }
248   }
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 //------------------------------------------------------------------------------
253 // Input Basis Action
254 //------------------------------------------------------------------------------
255 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
256     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
257     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
258     CeedScalar *e_data_full[2*CEED_FIELD_MAX], CeedOperator_Blocked *impl) {
259   CeedInt ierr;
260   CeedInt dim, elem_size, size;
261   CeedElemRestriction elem_restr;
262   CeedEvalMode eval_mode;
263   CeedBasis basis;
264 
265   for (CeedInt i=0; i<num_input_fields; i++) {
266     // Skip active input
267     if (skip_active) {
268       CeedVector vec;
269       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
270       CeedChkBackend(ierr);
271       if (vec == CEED_VECTOR_ACTIVE)
272         continue;
273     }
274 
275     // Get elem_size, eval_mode, size
276     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
277     CeedChkBackend(ierr);
278     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
279     CeedChkBackend(ierr);
280     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
281     CeedChkBackend(ierr);
282     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
283     CeedChkBackend(ierr);
284     // Basis action
285     switch(eval_mode) {
286     case CEED_EVAL_NONE:
287       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
288                                 CEED_USE_POINTER, &e_data_full[i][e*Q*size]);
289       CeedChkBackend(ierr);
290       break;
291     case CEED_EVAL_INTERP:
292       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
293       CeedChkBackend(ierr);
294       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
295                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size]);
296       CeedChkBackend(ierr);
297       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
298                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
299                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
300       break;
301     case CEED_EVAL_GRAD:
302       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
303       CeedChkBackend(ierr);
304       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
305       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
306                                 CEED_USE_POINTER, &e_data_full[i][e*elem_size*size/dim]);
307       CeedChkBackend(ierr);
308       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
309                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
310                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
311       break;
312     case CEED_EVAL_WEIGHT:
313       break;  // No action
314     // LCOV_EXCL_START
315     case CEED_EVAL_DIV:
316     case CEED_EVAL_CURL: {
317       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
318       CeedChkBackend(ierr);
319       Ceed ceed;
320       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
321       return CeedError(ceed, CEED_ERROR_BACKEND,
322                        "Ceed evaluation mode not implemented");
323       // LCOV_EXCL_STOP
324     }
325     }
326   }
327   return CEED_ERROR_SUCCESS;
328 }
329 
330 //------------------------------------------------------------------------------
331 // Output Basis Action
332 //------------------------------------------------------------------------------
333 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
334     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
335     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
336     CeedOperator op, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
337     CeedOperator_Blocked *impl) {
338   CeedInt ierr;
339   CeedInt dim, elem_size, size;
340   CeedElemRestriction elem_restr;
341   CeedEvalMode eval_mode;
342   CeedBasis basis;
343 
344   for (CeedInt i=0; i<num_output_fields; i++) {
345     // Get elem_size, eval_mode, size
346     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
347     CeedChkBackend(ierr);
348     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
349     CeedChkBackend(ierr);
350     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
351     CeedChkBackend(ierr);
352     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
353     CeedChkBackend(ierr);
354     // Basis action
355     switch(eval_mode) {
356     case CEED_EVAL_NONE:
357       break; // No action
358     case CEED_EVAL_INTERP:
359       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
360       CeedChkBackend(ierr);
361       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
362                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size]);
363       CeedChkBackend(ierr);
364       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
365                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
366                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
367       break;
368     case CEED_EVAL_GRAD:
369       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
370       CeedChkBackend(ierr);
371       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
372       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
373                                 CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size/dim]);
374       CeedChkBackend(ierr);
375       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
376                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
377                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
378       break;
379     // LCOV_EXCL_START
380     case CEED_EVAL_WEIGHT: {
381       Ceed ceed;
382       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
383       return CeedError(ceed, CEED_ERROR_BACKEND,
384                        "CEED_EVAL_WEIGHT cannot be an output "
385                        "evaluation mode");
386     }
387     case CEED_EVAL_DIV:
388     case CEED_EVAL_CURL: {
389       Ceed ceed;
390       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
391       return CeedError(ceed, CEED_ERROR_BACKEND,
392                        "Ceed evaluation mode not implemented");
393       // LCOV_EXCL_STOP
394     }
395     }
396   }
397   return CEED_ERROR_SUCCESS;
398 }
399 
400 //------------------------------------------------------------------------------
401 // Restore Input Vectors
402 //------------------------------------------------------------------------------
403 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
404     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
405     bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX],
406     CeedOperator_Blocked *impl) {
407   CeedInt ierr;
408   CeedEvalMode eval_mode;
409 
410   for (CeedInt i=0; i<num_input_fields; i++) {
411     // Skip active inputs
412     if (skip_active) {
413       CeedVector vec;
414       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
415       CeedChkBackend(ierr);
416       if (vec == CEED_VECTOR_ACTIVE)
417         continue;
418     }
419     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
420     CeedChkBackend(ierr);
421     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
422     } else {
423       ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i],
424                                         (const CeedScalar **) &e_data_full[i]);
425       CeedChkBackend(ierr);
426     }
427   }
428   return CEED_ERROR_SUCCESS;
429 }
430 
431 //------------------------------------------------------------------------------
432 // Operator Apply
433 //------------------------------------------------------------------------------
434 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
435                                         CeedVector out_vec,
436                                         CeedRequest *request) {
437   int ierr;
438   CeedOperator_Blocked *impl;
439   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
440   const CeedInt blk_size = 8;
441   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
442   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
443   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
444   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
445   CeedQFunction qf;
446   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
447   CeedOperatorField *op_input_fields, *op_output_fields;
448   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
449                                &num_output_fields, &op_output_fields);
450   CeedChkBackend(ierr);
451   CeedQFunctionField *qf_input_fields, *qf_output_fields;
452   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
453                                 &qf_output_fields);
454   CeedChkBackend(ierr);
455   CeedEvalMode eval_mode;
456   CeedVector vec;
457   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
458 
459   // Setup
460   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
461 
462   // Restriction only operator
463   if (impl->is_identity_restr_op) {
464     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
465                                     impl->e_vecs_full[0], request); CeedChkBackend(ierr);
466     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
467                                     impl->e_vecs_full[0], out_vec, request); CeedChkBackend(ierr);
468     return CEED_ERROR_SUCCESS;
469   }
470 
471   // Input Evecs and Restriction
472   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
473                                          op_input_fields, in_vec, false, e_data_full,
474                                          impl, request); CeedChkBackend(ierr);
475 
476   // Output Evecs
477   for (CeedInt i=0; i<num_output_fields; i++) {
478     ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs],
479                                    CEED_MEM_HOST, &e_data_full[i + num_input_fields]);
480     CeedChkBackend(ierr);
481   }
482 
483   // Loop through elements
484   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
485     // Output pointers
486     for (CeedInt i=0; i<num_output_fields; i++) {
487       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
488       CeedChkBackend(ierr);
489       if (eval_mode == CEED_EVAL_NONE) {
490         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
491         CeedChkBackend(ierr);
492         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
493                                   CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*Q*size]);
494         CeedChkBackend(ierr);
495       }
496     }
497 
498     // Input basis apply
499     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
500                                           num_input_fields, blk_size, false, e_data_full,
501                                           impl); CeedChkBackend(ierr);
502 
503     // Q function
504     if (!impl->is_identity_qf) {
505       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
506       CeedChkBackend(ierr);
507     }
508 
509     // Output basis apply
510     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
511                                            blk_size, num_input_fields,
512                                            num_output_fields, op, e_data_full, impl);
513     CeedChkBackend(ierr);
514   }
515 
516   // Output restriction
517   for (CeedInt i=0; i<num_output_fields; i++) {
518     // Restore evec
519     ierr = CeedVectorRestoreArray(impl->e_vecs_full[i+impl->num_inputs],
520                                   &e_data_full[i + num_input_fields]);
521     CeedChkBackend(ierr);
522     // Get output vector
523     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
524     CeedChkBackend(ierr);
525     // Active
526     if (vec == CEED_VECTOR_ACTIVE)
527       vec = out_vec;
528     // Restrict
529     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_inputs],
530                                     CEED_TRANSPOSE, impl->e_vecs_full[i+impl->num_inputs],
531                                     vec, request); CeedChkBackend(ierr);
532 
533   }
534 
535   // Restore input arrays
536   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
537          op_input_fields, false, e_data_full, impl); CeedChkBackend(ierr);
538 
539   return CEED_ERROR_SUCCESS;
540 }
541 
542 //------------------------------------------------------------------------------
543 // Core code for assembling linear QFunction
544 //------------------------------------------------------------------------------
545 static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(
546   CeedOperator op, bool build_objects, CeedVector *assembled,
547   CeedElemRestriction *rstr, CeedRequest *request) {
548   int ierr;
549   CeedOperator_Blocked *impl;
550   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
551   const CeedInt blk_size = 8;
552   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
553   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
554   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
555   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
556   CeedQFunction qf;
557   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
558   CeedOperatorField *op_input_fields, *op_output_fields;
559   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
560                                &num_output_fields, &op_output_fields);
561   CeedChkBackend(ierr);
562   CeedQFunctionField *qf_input_fields, *qf_output_fields;
563   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
564                                 &qf_output_fields);
565   CeedChkBackend(ierr);
566   CeedVector vec, l_vec = impl->qf_l_vec;
567   CeedInt num_active_in = impl->num_active_in,
568           num_active_out = impl->num_active_out;
569   CeedVector *active_in = impl->qf_active_in;
570   CeedScalar *a, *tmp;
571   Ceed ceed;
572   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
573   CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0};
574 
575   // Setup
576   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
577 
578   // Check for identity
579   if (impl->is_identity_qf)
580     // LCOV_EXCL_START
581     return CeedError(ceed, CEED_ERROR_BACKEND,
582                      "Assembling identity QFunctions not supported");
583   // LCOV_EXCL_STOP
584 
585   // Input Evecs and Restriction
586   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
587                                          op_input_fields, NULL, true, e_data_full,
588                                          impl, request); CeedChkBackend(ierr);
589 
590   // Count number of active input fields
591   if (!num_active_in) {
592     for (CeedInt i=0; i<num_input_fields; i++) {
593       // Get input vector
594       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
595       CeedChkBackend(ierr);
596       // Check if active input
597       if (vec == CEED_VECTOR_ACTIVE) {
598         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
599         CeedChkBackend(ierr);
600         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
601         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
602         CeedChkBackend(ierr);
603         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
604         for (CeedInt field=0; field<size; field++) {
605           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
606           CeedChkBackend(ierr);
607           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
608                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
609           CeedChkBackend(ierr);
610         }
611         num_active_in += size;
612         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
613       }
614     }
615     impl->num_active_in = num_active_in;
616     impl->qf_active_in = active_in;
617   }
618 
619   // Count number of active output fields
620   if (!num_active_out) {
621     for (CeedInt i=0; i<num_output_fields; i++) {
622       // Get output vector
623       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
624       CeedChkBackend(ierr);
625       // Check if active output
626       if (vec == CEED_VECTOR_ACTIVE) {
627         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
628         CeedChkBackend(ierr);
629         num_active_out += size;
630       }
631     }
632     impl->num_active_out = num_active_out;
633   }
634 
635   // Check sizes
636   if (!num_active_in || !num_active_out)
637     // LCOV_EXCL_START
638     return CeedError(ceed, CEED_ERROR_BACKEND,
639                      "Cannot assemble QFunction without active inputs "
640                      "and outputs");
641   // LCOV_EXCL_STOP
642 
643   // Setup Lvec
644   if (!l_vec) {
645     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
646                             &l_vec); CeedChkBackend(ierr);
647     impl->qf_l_vec = l_vec;
648   }
649   ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
650 
651   // Build objects if needed
652   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
653   if (build_objects) {
654     // Create output restriction
655     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
656                                             num_active_in*num_active_out,
657                                             num_active_in*num_active_out*num_elem*Q,
658                                             strides, rstr); CeedChkBackend(ierr);
659     // Create assembled vector
660     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
661                             assembled); CeedChkBackend(ierr);
662   }
663 
664   // Loop through elements
665   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
666     // Input basis apply
667     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
668                                           num_input_fields, blk_size, true, e_data_full,
669                                           impl); CeedChkBackend(ierr);
670 
671     // Assemble QFunction
672     for (CeedInt in=0; in<num_active_in; in++) {
673       // Set Inputs
674       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
675       if (num_active_in > 1) {
676         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
677                                   0.0); CeedChkBackend(ierr);
678       }
679       // Set Outputs
680       for (CeedInt out=0; out<num_output_fields; out++) {
681         // Get output vector
682         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
683         CeedChkBackend(ierr);
684         // Check if active output
685         if (vec == CEED_VECTOR_ACTIVE) {
686           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
687                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
688           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
689           CeedChkBackend(ierr);
690           a += size*Q*blk_size; // Advance the pointer by the size of the output
691         }
692       }
693       // Apply QFunction
694       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
695       CeedChkBackend(ierr);
696     }
697   }
698 
699   // Un-set output Qvecs to prevent accidental overwrite of Assembled
700   for (CeedInt out=0; out<num_output_fields; out++) {
701     // Get output vector
702     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
703     CeedChkBackend(ierr);
704     // Check if active output
705     if (vec == CEED_VECTOR_ACTIVE) {
706       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
707                          NULL); CeedChkBackend(ierr);
708     }
709   }
710 
711   // Restore input arrays
712   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
713          op_input_fields, true, e_data_full, impl); CeedChkBackend(ierr);
714 
715   // Output blocked restriction
716   ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr);
717   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
718   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
719   if (!impl->qf_blk_rstr) {
720     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
721            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
722            strides, &blk_rstr); CeedChkBackend(ierr);
723     impl->qf_blk_rstr = blk_rstr;
724   }
725   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled,
726                                   request); CeedChkBackend(ierr);
727 
728   return CEED_ERROR_SUCCESS;
729 }
730 
731 //------------------------------------------------------------------------------
732 // Assemble Linear QFunction
733 //------------------------------------------------------------------------------
734 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
735     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
736   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled,
737          rstr, request);
738 }
739 
740 //------------------------------------------------------------------------------
741 // Update Assembled Linear QFunction
742 //------------------------------------------------------------------------------
743 static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op,
744     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
745   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled,
746          &rstr, request);
747 }
748 
749 //------------------------------------------------------------------------------
750 // Operator Destroy
751 //------------------------------------------------------------------------------
752 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
753   int ierr;
754   CeedOperator_Blocked *impl;
755   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
756 
757   for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) {
758     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
759     ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr);
760   }
761   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
762   ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr);
763   ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr);
764 
765   for (CeedInt i=0; i<impl->num_inputs; i++) {
766     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
767     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
768   }
769   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
770   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
771 
772   for (CeedInt i=0; i<impl->num_outputs; i++) {
773     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
774     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
775   }
776   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
777   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
778 
779   // QFunction assembly data
780   for (CeedInt i=0; i<impl->num_active_in; i++) {
781     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
782   }
783   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
784   ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr);
785   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
786 
787   ierr = CeedFree(&impl); CeedChkBackend(ierr);
788   return CEED_ERROR_SUCCESS;
789 }
790 
791 //------------------------------------------------------------------------------
792 // Operator Create
793 //------------------------------------------------------------------------------
794 int CeedOperatorCreate_Blocked(CeedOperator op) {
795   int ierr;
796   Ceed ceed;
797   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
798   CeedOperator_Blocked *impl;
799 
800   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
801   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
802 
803   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
804                                 CeedOperatorLinearAssembleQFunction_Blocked);
805   CeedChkBackend(ierr);
806   ierr = CeedSetBackendFunction(ceed, "Operator", op,
807                                 "LinearAssembleQFunctionUpdate",
808                                 CeedOperatorLinearAssembleQFunctionUpdate_Blocked);
809   CeedChkBackend(ierr);
810   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
811                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
812   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
813                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
814   return CEED_ERROR_SUCCESS;
815 }
816 //------------------------------------------------------------------------------
817