xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 70a7ffb32f2ac8f6de27898459ba6016395fdb67)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include "ceed-blocked.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28     CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr,
29     CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs,
30     CeedInt start_e, CeedInt numfields, CeedInt Q) {
31   CeedInt dim, ierr, num_comp, size, P;
32   Ceed ceed;
33   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
34   CeedBasis basis;
35   CeedElemRestriction r;
36   CeedOperatorField *op_fields;
37   CeedQFunctionField *qf_fields;
38   if (inOrOut) {
39     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields);
40     CeedChkBackend(ierr);
41     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
42     CeedChkBackend(ierr);
43   } else {
44     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
45     CeedChkBackend(ierr);
46     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
47     CeedChkBackend(ierr);
48   }
49   const CeedInt blk_size = 8;
50 
51   // Loop over fields
52   for (CeedInt i=0; i<numfields; i++) {
53     CeedEvalMode eval_mode;
54     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
55     CeedChkBackend(ierr);
56 
57     if (eval_mode != CEED_EVAL_WEIGHT) {
58       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
59       CeedChkBackend(ierr);
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
61       CeedInt num_elem, elem_size, l_size, comp_stride;
62       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
63       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
64       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
66 
67       bool strided;
68       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
69       if (strided) {
70         CeedInt strides[3];
71         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
72         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
73                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
74         CeedChkBackend(ierr);
75       } else {
76         const CeedInt *offsets = NULL;
77         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
78         CeedChkBackend(ierr);
79         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
80         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
81                                                 blk_size, num_comp, comp_stride,
82                                                 l_size, CEED_MEM_HOST,
83                                                 CEED_COPY_VALUES, offsets,
84                                                 &blk_restr[i+start_e]);
85         CeedChkBackend(ierr);
86         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
87       }
88       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
89                                              &full_evecs[i+start_e]);
90       CeedChkBackend(ierr);
91     }
92 
93     switch(eval_mode) {
94     case CEED_EVAL_NONE:
95       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
96       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
97       CeedChkBackend(ierr);
98       break;
99     case CEED_EVAL_INTERP:
100       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
101       ierr = CeedElemRestrictionGetElementSize(r, &P);
102       CeedChkBackend(ierr);
103       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
104       CeedChkBackend(ierr);
105       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
106       CeedChkBackend(ierr);
107       break;
108     case CEED_EVAL_GRAD:
109       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
110       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
111       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
112       ierr = CeedElemRestrictionGetElementSize(r, &P);
113       CeedChkBackend(ierr);
114       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
115       CeedChkBackend(ierr);
116       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
117       CeedChkBackend(ierr);
118       break;
119     case CEED_EVAL_WEIGHT: // Only on input fields
120       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
121       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
122       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
123                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
124       CeedChkBackend(ierr);
125 
126       break;
127     case CEED_EVAL_DIV:
128       break; // Not implemented
129     case CEED_EVAL_CURL:
130       break; // Not implemented
131     }
132   }
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 //------------------------------------------------------------------------------
137 // Setup Operator
138 //------------------------------------------------------------------------------
139 static int CeedOperatorSetup_Blocked(CeedOperator op) {
140   int ierr;
141   bool setup_done;
142   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
143   if (setup_done) return CEED_ERROR_SUCCESS;
144   Ceed ceed;
145   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
146   CeedOperator_Blocked *impl;
147   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
148   CeedQFunction qf;
149   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
150   CeedInt Q, num_input_fields, num_output_fields;
151   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
152   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
153   CeedOperatorField *op_input_fields, *op_output_fields;
154   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
155                                &num_output_fields, &op_output_fields);
156   CeedChkBackend(ierr);
157   CeedQFunctionField *qf_input_fields, *qf_output_fields;
158   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
159                                 &qf_output_fields);
160   CeedChkBackend(ierr);
161 
162   // Allocate
163   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
164   CeedChkBackend(ierr);
165   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
166   CeedChkBackend(ierr);
167   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
168   CeedChkBackend(ierr);
169 
170   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
171   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
172   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
173   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
174   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
175 
176   impl->num_e_vecs_in = num_input_fields;
177   impl->num_e_vecs_out = num_output_fields;
178 
179   // Set up infield and outfield pointer arrays
180   // Infields
181   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr,
182                                          impl->e_vecs, impl->e_vecs_in,
183                                          impl->q_vecs_in, 0,
184                                          num_input_fields, Q);
185   CeedChkBackend(ierr);
186   // Outfields
187   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr,
188                                          impl->e_vecs, impl->e_vecs_out,
189                                          impl->q_vecs_out, num_input_fields,
190                                          num_output_fields, Q);
191   CeedChkBackend(ierr);
192 
193   // Identity QFunctions
194   if (impl->is_identity_qf) {
195     CeedEvalMode in_mode, out_mode;
196     CeedQFunctionField *in_fields, *out_fields;
197     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
198     CeedChkBackend(ierr);
199     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
200     CeedChkBackend(ierr);
201     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
202     CeedChkBackend(ierr);
203 
204     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
205       impl->is_identity_restr_op = true;
206     } else {
207       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
208       impl->q_vecs_out[0] = impl->q_vecs_in[0];
209       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
210     }
211   }
212 
213   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
214 
215   return CEED_ERROR_SUCCESS;
216 }
217 
218 //------------------------------------------------------------------------------
219 // Setup Operator Inputs
220 //------------------------------------------------------------------------------
221 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
222     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
223     CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl,
224     CeedRequest *request) {
225   CeedInt ierr;
226   CeedEvalMode eval_mode;
227   CeedVector vec;
228   uint64_t state;
229 
230   for (CeedInt i=0; i<num_input_fields; i++) {
231     // Get input vector
232     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
233     CeedChkBackend(ierr);
234     if (vec == CEED_VECTOR_ACTIVE) {
235       if (skip_active)
236         continue;
237       else
238         vec = in_vec;
239     }
240 
241     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
242     CeedChkBackend(ierr);
243     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
244     } else {
245       // Restrict
246       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
247       if (state != impl->input_state[i] || vec == in_vec) {
248         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
249                                         vec, impl->e_vecs[i], request);
250         CeedChkBackend(ierr);
251         impl->input_state[i] = state;
252       }
253       // Get evec
254       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
255                                     (const CeedScalar **) &impl->e_data[i]);
256       CeedChkBackend(ierr);
257     }
258   }
259   return CEED_ERROR_SUCCESS;
260 }
261 
262 //------------------------------------------------------------------------------
263 // Input Basis Action
264 //------------------------------------------------------------------------------
265 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
266     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
267     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
268     CeedOperator_Blocked *impl) {
269   CeedInt ierr;
270   CeedInt dim, elem_size, size;
271   CeedElemRestriction elem_restr;
272   CeedEvalMode eval_mode;
273   CeedBasis basis;
274 
275   for (CeedInt i=0; i<num_input_fields; i++) {
276     // Skip active input
277     if (skip_active) {
278       CeedVector vec;
279       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
280       CeedChkBackend(ierr);
281       if (vec == CEED_VECTOR_ACTIVE)
282         continue;
283     }
284 
285     // Get elem_size, eval_mode, size
286     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
287     CeedChkBackend(ierr);
288     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
289     CeedChkBackend(ierr);
290     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
291     CeedChkBackend(ierr);
292     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
293     CeedChkBackend(ierr);
294     // Basis action
295     switch(eval_mode) {
296     case CEED_EVAL_NONE:
297       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
298                                 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
299       break;
300     case CEED_EVAL_INTERP:
301       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
302       CeedChkBackend(ierr);
303       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
304                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]);
305       CeedChkBackend(ierr);
306       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
307                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
308                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
309       break;
310     case CEED_EVAL_GRAD:
311       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
312       CeedChkBackend(ierr);
313       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
314       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
315                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]);
316       CeedChkBackend(ierr);
317       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
318                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
319                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
320       break;
321     case CEED_EVAL_WEIGHT:
322       break;  // No action
323     // LCOV_EXCL_START
324     case CEED_EVAL_DIV:
325     case CEED_EVAL_CURL: {
326       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
327       CeedChkBackend(ierr);
328       Ceed ceed;
329       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
330       return CeedError(ceed, CEED_ERROR_BACKEND,
331                        "Ceed evaluation mode not implemented");
332       // LCOV_EXCL_STOP
333     }
334     }
335   }
336   return CEED_ERROR_SUCCESS;
337 }
338 
339 //------------------------------------------------------------------------------
340 // Output Basis Action
341 //------------------------------------------------------------------------------
342 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
343     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
344     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
345     CeedOperator op, CeedOperator_Blocked *impl) {
346   CeedInt ierr;
347   CeedInt dim, elem_size, size;
348   CeedElemRestriction elem_restr;
349   CeedEvalMode eval_mode;
350   CeedBasis basis;
351 
352   for (CeedInt i=0; i<num_output_fields; i++) {
353     // Get elem_size, eval_mode, size
354     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
355     CeedChkBackend(ierr);
356     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
357     CeedChkBackend(ierr);
358     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
359     CeedChkBackend(ierr);
360     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
361     CeedChkBackend(ierr);
362     // Basis action
363     switch(eval_mode) {
364     case CEED_EVAL_NONE:
365       break; // No action
366     case CEED_EVAL_INTERP:
367       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
368       CeedChkBackend(ierr);
369       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
370                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]);
371       CeedChkBackend(ierr);
372       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
373                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
374                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
375       break;
376     case CEED_EVAL_GRAD:
377       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
378       CeedChkBackend(ierr);
379       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
380       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
381                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]);
382       CeedChkBackend(ierr);
383       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
384                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
385                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
386       break;
387     // LCOV_EXCL_START
388     case CEED_EVAL_WEIGHT: {
389       Ceed ceed;
390       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
391       return CeedError(ceed, CEED_ERROR_BACKEND,
392                        "CEED_EVAL_WEIGHT cannot be an output "
393                        "evaluation mode");
394     }
395     case CEED_EVAL_DIV:
396     case CEED_EVAL_CURL: {
397       Ceed ceed;
398       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
399       return CeedError(ceed, CEED_ERROR_BACKEND,
400                        "Ceed evaluation mode not implemented");
401       // LCOV_EXCL_STOP
402     }
403     }
404   }
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 //------------------------------------------------------------------------------
409 // Restore Input Vectors
410 //------------------------------------------------------------------------------
411 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
412     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
413     bool skip_active, CeedOperator_Blocked *impl) {
414   CeedInt ierr;
415   CeedEvalMode eval_mode;
416 
417   for (CeedInt i=0; i<num_input_fields; i++) {
418     // Skip active inputs
419     if (skip_active) {
420       CeedVector vec;
421       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
422       CeedChkBackend(ierr);
423       if (vec == CEED_VECTOR_ACTIVE)
424         continue;
425     }
426     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
427     CeedChkBackend(ierr);
428     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
429     } else {
430       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
431                                         (const CeedScalar **) &impl->e_data[i]);
432       CeedChkBackend(ierr);
433     }
434   }
435   return CEED_ERROR_SUCCESS;
436 }
437 
438 //------------------------------------------------------------------------------
439 // Operator Apply
440 //------------------------------------------------------------------------------
441 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
442                                         CeedVector out_vec,
443                                         CeedRequest *request) {
444   int ierr;
445   CeedOperator_Blocked *impl;
446   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
447   const CeedInt blk_size = 8;
448   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
449   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
450   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
451   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
452   CeedQFunction qf;
453   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
454   CeedOperatorField *op_input_fields, *op_output_fields;
455   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
456                                &num_output_fields, &op_output_fields);
457   CeedChkBackend(ierr);
458   CeedQFunctionField *qf_input_fields, *qf_output_fields;
459   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
460                                 &qf_output_fields);
461   CeedChkBackend(ierr);
462   CeedEvalMode eval_mode;
463   CeedVector vec;
464 
465   // Setup
466   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
467 
468   // Restriction only operator
469   if (impl->is_identity_restr_op) {
470     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
471                                     impl->e_vecs[0], request); CeedChkBackend(ierr);
472     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
473                                     impl->e_vecs[0], out_vec, request); CeedChkBackend(ierr);
474     return CEED_ERROR_SUCCESS;
475   }
476 
477   // Input Evecs and Restriction
478   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
479                                          op_input_fields, in_vec, false, impl,
480                                          request); CeedChkBackend(ierr);
481 
482   // Output Evecs
483   for (CeedInt i=0; i<num_output_fields; i++) {
484     ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST,
485                               &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
486   }
487 
488   // Loop through elements
489   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
490     // Output pointers
491     for (CeedInt i=0; i<num_output_fields; i++) {
492       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
493       CeedChkBackend(ierr);
494       if (eval_mode == CEED_EVAL_NONE) {
495         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
496         CeedChkBackend(ierr);
497         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
498                                   CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]);
499         CeedChkBackend(ierr);
500       }
501     }
502 
503     // Input basis apply
504     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
505                                           num_input_fields, blk_size, false, impl);
506     CeedChkBackend(ierr);
507 
508     // Q function
509     if (!impl->is_identity_qf) {
510       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
511       CeedChkBackend(ierr);
512     }
513 
514     // Output basis apply
515     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
516                                            blk_size, num_input_fields,
517                                            num_output_fields, op, impl);
518     CeedChkBackend(ierr);
519   }
520 
521   // Output restriction
522   for (CeedInt i=0; i<num_output_fields; i++) {
523     // Restore evec
524     ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in],
525                                   &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
526     // Get output vector
527     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
528     CeedChkBackend(ierr);
529     // Active
530     if (vec == CEED_VECTOR_ACTIVE)
531       vec = out_vec;
532     // Restrict
533     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in],
534                                     CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in],
535                                     vec, request); CeedChkBackend(ierr);
536 
537   }
538 
539   // Restore input arrays
540   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
541          op_input_fields, false, impl); CeedChkBackend(ierr);
542 
543   return CEED_ERROR_SUCCESS;
544 }
545 
546 //------------------------------------------------------------------------------
547 // Core code for assembling linear QFunction
548 //------------------------------------------------------------------------------
549 static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(
550   CeedOperator op,
551   bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
552   CeedRequest *request) {
553   int ierr;
554   CeedOperator_Blocked *impl;
555   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
556   const CeedInt blk_size = 8;
557   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
558   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
559   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
560   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
561   CeedQFunction qf;
562   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
563   CeedOperatorField *op_input_fields, *op_output_fields;
564   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
565                                &num_output_fields, &op_output_fields);
566   CeedChkBackend(ierr);
567   CeedQFunctionField *qf_input_fields, *qf_output_fields;
568   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
569                                 &qf_output_fields);
570   CeedChkBackend(ierr);
571   CeedVector vec, lvec;
572   CeedInt num_active_in = 0, num_active_out = 0;
573   CeedVector *active_in = NULL;
574   CeedScalar *a, *tmp;
575   Ceed ceed;
576   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
577 
578   // Setup
579   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
580 
581   // Check for identity
582   if (impl->is_identity_qf)
583     // LCOV_EXCL_START
584     return CeedError(ceed, CEED_ERROR_BACKEND,
585                      "Assembling identity QFunctions not supported");
586   // LCOV_EXCL_STOP
587 
588   // Input Evecs and Restriction
589   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
590                                          op_input_fields, NULL, true, impl,
591                                          request); CeedChkBackend(ierr);
592 
593   // Count number of active input fields
594   for (CeedInt i=0; i<num_input_fields; i++) {
595     // Get input vector
596     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
597     CeedChkBackend(ierr);
598     // Check if active input
599     if (vec == CEED_VECTOR_ACTIVE) {
600       ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
601       CeedChkBackend(ierr);
602       ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
603       ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
604       CeedChkBackend(ierr);
605       ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
606       for (CeedInt field=0; field<size; field++) {
607         ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
608         CeedChkBackend(ierr);
609         ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
610                                   CEED_USE_POINTER, &tmp[field*Q*blk_size]);
611         CeedChkBackend(ierr);
612       }
613       num_active_in += size;
614       ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
615     }
616   }
617 
618   // Count number of active output fields
619   for (CeedInt i=0; i<num_output_fields; i++) {
620     // Get output vector
621     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
622     CeedChkBackend(ierr);
623     // Check if active output
624     if (vec == CEED_VECTOR_ACTIVE) {
625       ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
626       CeedChkBackend(ierr);
627       num_active_out += size;
628     }
629   }
630 
631   // Check sizes
632   if (!num_active_in || !num_active_out)
633     // LCOV_EXCL_START
634     return CeedError(ceed, CEED_ERROR_BACKEND,
635                      "Cannot assemble QFunction without active inputs "
636                      "and outputs");
637   // LCOV_EXCL_STOP
638 
639   // Setup Lvec
640   ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
641                           &lvec); CeedChkBackend(ierr);
642   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
643 
644   // Build objects if needed
645   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
646   if (build_objects) {
647     // Create output restriction
648     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
649                                             num_active_in*num_active_out,
650                                             num_active_in*num_active_out*num_elem*Q,
651                                             strides, rstr); CeedChkBackend(ierr);
652     // Create assembled vector
653     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
654                             assembled); CeedChkBackend(ierr);
655   }
656 
657   // Loop through elements
658   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
659     // Input basis apply
660     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
661                                           num_input_fields, blk_size, true, impl);
662     CeedChkBackend(ierr);
663 
664     // Assemble QFunction
665     for (CeedInt in=0; in<num_active_in; in++) {
666       // Set Inputs
667       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
668       if (num_active_in > 1) {
669         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
670                                   0.0); CeedChkBackend(ierr);
671       }
672       // Set Outputs
673       for (CeedInt out=0; out<num_output_fields; out++) {
674         // Get output vector
675         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
676         CeedChkBackend(ierr);
677         // Check if active output
678         if (vec == CEED_VECTOR_ACTIVE) {
679           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
680                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
681           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
682           CeedChkBackend(ierr);
683           a += size*Q*blk_size; // Advance the pointer by the size of the output
684         }
685       }
686       // Apply QFunction
687       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
688       CeedChkBackend(ierr);
689     }
690   }
691 
692   // Un-set output Qvecs to prevent accidental overwrite of Assembled
693   for (CeedInt out=0; out<num_output_fields; out++) {
694     // Get output vector
695     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
696     CeedChkBackend(ierr);
697     // Check if active output
698     if (vec == CEED_VECTOR_ACTIVE) {
699       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
700                          NULL); CeedChkBackend(ierr);
701     }
702   }
703 
704   // Restore input arrays
705   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
706          op_input_fields, true, impl); CeedChkBackend(ierr);
707 
708   // Output blocked restriction
709   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
710   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
711   CeedElemRestriction blk_rstr;
712   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
713          num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
714          strides, &blk_rstr); CeedChkBackend(ierr);
715   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
716                                   request); CeedChkBackend(ierr);
717 
718   // Cleanup
719   for (CeedInt i=0; i<num_active_in; i++) {
720     ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr);
721   }
722   ierr = CeedFree(&active_in); CeedChkBackend(ierr);
723   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
724   ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr);
725 
726   return CEED_ERROR_SUCCESS;
727 }
728 
729 //------------------------------------------------------------------------------
730 // Assemble Linear QFunction
731 //------------------------------------------------------------------------------
732 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
733     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
734   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled,
735          rstr, request);
736 }
737 
738 //------------------------------------------------------------------------------
739 // Update Assembled Linear QFunction
740 //------------------------------------------------------------------------------
741 static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op,
742     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
743   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled,
744          &rstr, request);
745 }
746 
747 //------------------------------------------------------------------------------
748 // Operator Destroy
749 //------------------------------------------------------------------------------
750 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
751   int ierr;
752   CeedOperator_Blocked *impl;
753   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
754 
755   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
756     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
757     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
758   }
759   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
760   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
761   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
762   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
763 
764   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
765     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
766     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
767   }
768   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
769   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
770 
771   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
772     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
773     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
774   }
775   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
776   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
777 
778   ierr = CeedFree(&impl); CeedChkBackend(ierr);
779   return CEED_ERROR_SUCCESS;
780 }
781 
782 //------------------------------------------------------------------------------
783 // Operator Create
784 //------------------------------------------------------------------------------
785 int CeedOperatorCreate_Blocked(CeedOperator op) {
786   int ierr;
787   Ceed ceed;
788   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
789   CeedOperator_Blocked *impl;
790 
791   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
792   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
793 
794   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
795                                 CeedOperatorLinearAssembleQFunction_Blocked);
796   CeedChkBackend(ierr);
797   ierr = CeedSetBackendFunction(ceed, "Operator", op,
798                                 "LinearAssembleQFunctionUpdate",
799                                 CeedOperatorLinearAssembleQFunctionUpdate_Blocked);
800   CeedChkBackend(ierr);
801   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
802                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
803   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
804                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
805   return CEED_ERROR_SUCCESS;
806 }
807 //------------------------------------------------------------------------------
808