xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision b425b72ce6207063c385cb41f0ea18e8c839ca9f)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include "ceed-blocked.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28     CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr,
29     CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs,
30     CeedInt start_e, CeedInt numfields, CeedInt Q) {
31   CeedInt dim, ierr, num_comp, size, P;
32   Ceed ceed;
33   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
34   CeedBasis basis;
35   CeedElemRestriction r;
36   CeedOperatorField *op_fields;
37   CeedQFunctionField *qf_fields;
38   if (inOrOut) {
39     ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields);
40     CeedChkBackend(ierr);
41     ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields);
42     CeedChkBackend(ierr);
43   } else {
44     ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL);
45     CeedChkBackend(ierr);
46     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL);
47     CeedChkBackend(ierr);
48   }
49   const CeedInt blk_size = 8;
50 
51   // Loop over fields
52   for (CeedInt i=0; i<numfields; i++) {
53     CeedEvalMode eval_mode;
54     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
55     CeedChkBackend(ierr);
56 
57     if (eval_mode != CEED_EVAL_WEIGHT) {
58       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
59       CeedChkBackend(ierr);
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
61       CeedInt num_elem, elem_size, l_size, comp_stride;
62       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
63       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
64       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
66 
67       bool strided;
68       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
69       if (strided) {
70         CeedInt strides[3];
71         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
72         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
73                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
74         CeedChkBackend(ierr);
75       } else {
76         const CeedInt *offsets = NULL;
77         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
78         CeedChkBackend(ierr);
79         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
80         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
81                                                 blk_size, num_comp, comp_stride,
82                                                 l_size, CEED_MEM_HOST,
83                                                 CEED_COPY_VALUES, offsets,
84                                                 &blk_restr[i+start_e]);
85         CeedChkBackend(ierr);
86         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
87       }
88       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
89                                              &full_evecs[i+start_e]);
90       CeedChkBackend(ierr);
91     }
92 
93     switch(eval_mode) {
94     case CEED_EVAL_NONE:
95       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
96       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
97       CeedChkBackend(ierr);
98       break;
99     case CEED_EVAL_INTERP:
100       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
101       ierr = CeedElemRestrictionGetElementSize(r, &P);
102       CeedChkBackend(ierr);
103       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
104       CeedChkBackend(ierr);
105       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
106       CeedChkBackend(ierr);
107       break;
108     case CEED_EVAL_GRAD:
109       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
110       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
111       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
112       ierr = CeedElemRestrictionGetElementSize(r, &P);
113       CeedChkBackend(ierr);
114       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
115       CeedChkBackend(ierr);
116       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
117       CeedChkBackend(ierr);
118       break;
119     case CEED_EVAL_WEIGHT: // Only on input fields
120       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
121       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
122       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
123                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
124       CeedChkBackend(ierr);
125 
126       break;
127     case CEED_EVAL_DIV:
128       break; // Not implemented
129     case CEED_EVAL_CURL:
130       break; // Not implemented
131     }
132   }
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 //------------------------------------------------------------------------------
137 // Setup Operator
138 //------------------------------------------------------------------------------
139 static int CeedOperatorSetup_Blocked(CeedOperator op) {
140   int ierr;
141   bool setup_done;
142   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
143   if (setup_done) return CEED_ERROR_SUCCESS;
144   Ceed ceed;
145   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
146   CeedOperator_Blocked *impl;
147   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
148   CeedQFunction qf;
149   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
150   CeedInt Q, num_input_fields, num_output_fields;
151   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
152   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
153   CeedOperatorField *op_input_fields, *op_output_fields;
154   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
155                                &num_output_fields, &op_output_fields);
156   CeedChkBackend(ierr);
157   CeedQFunctionField *qf_input_fields, *qf_output_fields;
158   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
159                                 &qf_output_fields);
160   CeedChkBackend(ierr);
161 
162   // Allocate
163   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
164   CeedChkBackend(ierr);
165   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
166   CeedChkBackend(ierr);
167   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
168   CeedChkBackend(ierr);
169 
170   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
171   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
172   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
173   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
174   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
175 
176   impl->num_e_vecs_in = num_input_fields;
177   impl->num_e_vecs_out = num_output_fields;
178 
179   // Set up infield and outfield pointer arrays
180   // Infields
181   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr,
182                                          impl->e_vecs, impl->e_vecs_in,
183                                          impl->q_vecs_in, 0,
184                                          num_input_fields, Q);
185   CeedChkBackend(ierr);
186   // Outfields
187   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr,
188                                          impl->e_vecs, impl->e_vecs_out,
189                                          impl->q_vecs_out, num_input_fields,
190                                          num_output_fields, Q);
191   CeedChkBackend(ierr);
192 
193   // Identity QFunctions
194   if (impl->is_identity_qf) {
195     CeedEvalMode in_mode, out_mode;
196     CeedQFunctionField *in_fields, *out_fields;
197     ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields);
198     CeedChkBackend(ierr);
199     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
200     CeedChkBackend(ierr);
201     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
202     CeedChkBackend(ierr);
203 
204     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
205       impl->is_identity_restr_op = true;
206     } else {
207       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
208       impl->q_vecs_out[0] = impl->q_vecs_in[0];
209       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
210     }
211   }
212 
213   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
214 
215   return CEED_ERROR_SUCCESS;
216 }
217 
218 //------------------------------------------------------------------------------
219 // Setup Operator Inputs
220 //------------------------------------------------------------------------------
221 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
222     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
223     CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl,
224     CeedRequest *request) {
225   CeedInt ierr;
226   CeedEvalMode eval_mode;
227   CeedVector vec;
228   uint64_t state;
229 
230   for (CeedInt i=0; i<num_input_fields; i++) {
231     // Get input vector
232     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
233     CeedChkBackend(ierr);
234     if (vec == CEED_VECTOR_ACTIVE) {
235       if (skip_active)
236         continue;
237       else
238         vec = in_vec;
239     }
240 
241     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
242     CeedChkBackend(ierr);
243     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
244     } else {
245       // Restrict
246       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
247       if (state != impl->input_state[i] || vec == in_vec) {
248         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
249                                         vec, impl->e_vecs[i], request);
250         CeedChkBackend(ierr);
251         impl->input_state[i] = state;
252       }
253       // Get evec
254       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
255                                     (const CeedScalar **) &impl->e_data[i]);
256       CeedChkBackend(ierr);
257     }
258   }
259   return CEED_ERROR_SUCCESS;
260 }
261 
262 //------------------------------------------------------------------------------
263 // Input Basis Action
264 //------------------------------------------------------------------------------
265 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
266     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
267     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
268     CeedOperator_Blocked *impl) {
269   CeedInt ierr;
270   CeedInt dim, elem_size, size;
271   CeedElemRestriction elem_restr;
272   CeedEvalMode eval_mode;
273   CeedBasis basis;
274 
275   for (CeedInt i=0; i<num_input_fields; i++) {
276     // Skip active input
277     if (skip_active) {
278       CeedVector vec;
279       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
280       CeedChkBackend(ierr);
281       if (vec == CEED_VECTOR_ACTIVE)
282         continue;
283     }
284 
285     // Get elem_size, eval_mode, size
286     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
287     CeedChkBackend(ierr);
288     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
289     CeedChkBackend(ierr);
290     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
291     CeedChkBackend(ierr);
292     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
293     CeedChkBackend(ierr);
294     // Basis action
295     switch(eval_mode) {
296     case CEED_EVAL_NONE:
297       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
298                                 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
299       break;
300     case CEED_EVAL_INTERP:
301       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
302       CeedChkBackend(ierr);
303       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
304                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]);
305       CeedChkBackend(ierr);
306       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
307                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
308                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
309       break;
310     case CEED_EVAL_GRAD:
311       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
312       CeedChkBackend(ierr);
313       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
314       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
315                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]);
316       CeedChkBackend(ierr);
317       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
318                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
319                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
320       break;
321     case CEED_EVAL_WEIGHT:
322       break;  // No action
323     // LCOV_EXCL_START
324     case CEED_EVAL_DIV:
325     case CEED_EVAL_CURL: {
326       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
327       CeedChkBackend(ierr);
328       Ceed ceed;
329       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
330       return CeedError(ceed, CEED_ERROR_BACKEND,
331                        "Ceed evaluation mode not implemented");
332       // LCOV_EXCL_STOP
333     }
334     }
335   }
336   return CEED_ERROR_SUCCESS;
337 }
338 
339 //------------------------------------------------------------------------------
340 // Output Basis Action
341 //------------------------------------------------------------------------------
342 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
343     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
344     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
345     CeedOperator op, CeedOperator_Blocked *impl) {
346   CeedInt ierr;
347   CeedInt dim, elem_size, size;
348   CeedElemRestriction elem_restr;
349   CeedEvalMode eval_mode;
350   CeedBasis basis;
351 
352   for (CeedInt i=0; i<num_output_fields; i++) {
353     // Get elem_size, eval_mode, size
354     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
355     CeedChkBackend(ierr);
356     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
357     CeedChkBackend(ierr);
358     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
359     CeedChkBackend(ierr);
360     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
361     CeedChkBackend(ierr);
362     // Basis action
363     switch(eval_mode) {
364     case CEED_EVAL_NONE:
365       break; // No action
366     case CEED_EVAL_INTERP:
367       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
368       CeedChkBackend(ierr);
369       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
370                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]);
371       CeedChkBackend(ierr);
372       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
373                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
374                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
375       break;
376     case CEED_EVAL_GRAD:
377       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
378       CeedChkBackend(ierr);
379       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
380       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
381                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]);
382       CeedChkBackend(ierr);
383       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
384                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
385                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
386       break;
387     // LCOV_EXCL_START
388     case CEED_EVAL_WEIGHT: {
389       Ceed ceed;
390       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
391       return CeedError(ceed, CEED_ERROR_BACKEND,
392                        "CEED_EVAL_WEIGHT cannot be an output "
393                        "evaluation mode");
394     }
395     case CEED_EVAL_DIV:
396     case CEED_EVAL_CURL: {
397       Ceed ceed;
398       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
399       return CeedError(ceed, CEED_ERROR_BACKEND,
400                        "Ceed evaluation mode not implemented");
401       // LCOV_EXCL_STOP
402     }
403     }
404   }
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 //------------------------------------------------------------------------------
409 // Restore Input Vectors
410 //------------------------------------------------------------------------------
411 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
412     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
413     bool skip_active, CeedOperator_Blocked *impl) {
414   CeedInt ierr;
415   CeedEvalMode eval_mode;
416 
417   for (CeedInt i=0; i<num_input_fields; i++) {
418     // Skip active inputs
419     if (skip_active) {
420       CeedVector vec;
421       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
422       CeedChkBackend(ierr);
423       if (vec == CEED_VECTOR_ACTIVE)
424         continue;
425     }
426     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
427     CeedChkBackend(ierr);
428     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
429     } else {
430       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
431                                         (const CeedScalar **) &impl->e_data[i]);
432       CeedChkBackend(ierr);
433     }
434   }
435   return CEED_ERROR_SUCCESS;
436 }
437 
438 //------------------------------------------------------------------------------
439 // Operator Apply
440 //------------------------------------------------------------------------------
441 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
442                                         CeedVector out_vec,
443                                         CeedRequest *request) {
444   int ierr;
445   CeedOperator_Blocked *impl;
446   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
447   const CeedInt blk_size = 8;
448   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
449   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
450   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
451   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
452   CeedQFunction qf;
453   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
454   CeedOperatorField *op_input_fields, *op_output_fields;
455   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
456                                &num_output_fields, &op_output_fields);
457   CeedChkBackend(ierr);
458   CeedQFunctionField *qf_input_fields, *qf_output_fields;
459   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
460                                 &qf_output_fields);
461   CeedChkBackend(ierr);
462   CeedEvalMode eval_mode;
463   CeedVector vec;
464 
465   // Setup
466   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
467 
468   // Restriction only operator
469   if (impl->is_identity_restr_op) {
470     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
471                                     impl->e_vecs[0], request); CeedChkBackend(ierr);
472     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
473                                     impl->e_vecs[0], out_vec, request); CeedChkBackend(ierr);
474     return CEED_ERROR_SUCCESS;
475   }
476 
477   // Input Evecs and Restriction
478   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
479                                          op_input_fields, in_vec, false, impl,
480                                          request); CeedChkBackend(ierr);
481 
482   // Output Evecs
483   for (CeedInt i=0; i<num_output_fields; i++) {
484     ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST,
485                               &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
486   }
487 
488   // Loop through elements
489   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
490     // Output pointers
491     for (CeedInt i=0; i<num_output_fields; i++) {
492       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
493       CeedChkBackend(ierr);
494       if (eval_mode == CEED_EVAL_NONE) {
495         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
496         CeedChkBackend(ierr);
497         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
498                                   CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]);
499         CeedChkBackend(ierr);
500       }
501     }
502 
503     // Input basis apply
504     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
505                                           num_input_fields, blk_size, false, impl);
506     CeedChkBackend(ierr);
507 
508     // Q function
509     if (!impl->is_identity_qf) {
510       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
511       CeedChkBackend(ierr);
512     }
513 
514     // Output basis apply
515     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
516                                            blk_size, num_input_fields,
517                                            num_output_fields, op, impl);
518     CeedChkBackend(ierr);
519   }
520 
521   // Output restriction
522   for (CeedInt i=0; i<num_output_fields; i++) {
523     // Restore evec
524     ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in],
525                                   &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
526     // Get output vector
527     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
528     CeedChkBackend(ierr);
529     // Active
530     if (vec == CEED_VECTOR_ACTIVE)
531       vec = out_vec;
532     // Restrict
533     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in],
534                                     CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in],
535                                     vec, request); CeedChkBackend(ierr);
536 
537   }
538 
539   // Restore input arrays
540   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
541          op_input_fields, false, impl); CeedChkBackend(ierr);
542 
543   return CEED_ERROR_SUCCESS;
544 }
545 
546 //------------------------------------------------------------------------------
547 // Core code for assembling linear QFunction
548 //------------------------------------------------------------------------------
549 static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked(
550   CeedOperator op,
551   bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
552   CeedRequest *request) {
553   int ierr;
554   CeedOperator_Blocked *impl;
555   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
556   const CeedInt blk_size = 8;
557   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
558   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
559   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
560   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
561   CeedQFunction qf;
562   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
563   CeedOperatorField *op_input_fields, *op_output_fields;
564   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields,
565                                &num_output_fields, &op_output_fields);
566   CeedChkBackend(ierr);
567   CeedQFunctionField *qf_input_fields, *qf_output_fields;
568   ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL,
569                                 &qf_output_fields);
570   CeedChkBackend(ierr);
571   CeedVector vec, lvec = impl->qf_lvec;
572   CeedInt num_active_in = impl->qf_num_active_in,
573           num_active_out = impl->qf_num_active_out;
574   CeedVector *active_in = impl->qf_active_in;
575   CeedScalar *a, *tmp;
576   Ceed ceed;
577   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
578 
579   // Setup
580   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
581 
582   // Check for identity
583   if (impl->is_identity_qf)
584     // LCOV_EXCL_START
585     return CeedError(ceed, CEED_ERROR_BACKEND,
586                      "Assembling identity QFunctions not supported");
587   // LCOV_EXCL_STOP
588 
589   // Input Evecs and Restriction
590   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
591                                          op_input_fields, NULL, true, impl,
592                                          request); CeedChkBackend(ierr);
593 
594   // Count number of active input fields
595   if (!num_active_in) {
596     for (CeedInt i=0; i<num_input_fields; i++) {
597       // Get input vector
598       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
599       CeedChkBackend(ierr);
600       // Check if active input
601       if (vec == CEED_VECTOR_ACTIVE) {
602         ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
603         CeedChkBackend(ierr);
604         ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
605         ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
606         CeedChkBackend(ierr);
607         ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
608         for (CeedInt field=0; field<size; field++) {
609           ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
610           CeedChkBackend(ierr);
611           ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
612                                     CEED_USE_POINTER, &tmp[field*Q*blk_size]);
613           CeedChkBackend(ierr);
614         }
615         num_active_in += size;
616         ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
617       }
618     }
619     impl->qf_num_active_in = num_active_in;
620     impl->qf_active_in = active_in;
621   }
622 
623   // Count number of active output fields
624   if (!num_active_out) {
625     for (CeedInt i=0; i<num_output_fields; i++) {
626       // Get output vector
627       ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
628       CeedChkBackend(ierr);
629       // Check if active output
630       if (vec == CEED_VECTOR_ACTIVE) {
631         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
632         CeedChkBackend(ierr);
633         num_active_out += size;
634       }
635     }
636     impl->qf_num_active_out = num_active_out;
637   }
638 
639   // Check sizes
640   if (!num_active_in || !num_active_out)
641     // LCOV_EXCL_START
642     return CeedError(ceed, CEED_ERROR_BACKEND,
643                      "Cannot assemble QFunction without active inputs "
644                      "and outputs");
645   // LCOV_EXCL_STOP
646 
647   // Setup Lvec
648   if (!lvec) {
649     ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
650                             &lvec); CeedChkBackend(ierr);
651     impl->qf_lvec = lvec;
652   }
653   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
654 
655   // Build objects if needed
656   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
657   if (build_objects) {
658     // Create output restriction
659     ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
660                                             num_active_in*num_active_out,
661                                             num_active_in*num_active_out*num_elem*Q,
662                                             strides, rstr); CeedChkBackend(ierr);
663     // Create assembled vector
664     ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
665                             assembled); CeedChkBackend(ierr);
666   }
667 
668   // Loop through elements
669   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
670     // Input basis apply
671     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
672                                           num_input_fields, blk_size, true, impl);
673     CeedChkBackend(ierr);
674 
675     // Assemble QFunction
676     for (CeedInt in=0; in<num_active_in; in++) {
677       // Set Inputs
678       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
679       if (num_active_in > 1) {
680         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
681                                   0.0); CeedChkBackend(ierr);
682       }
683       // Set Outputs
684       for (CeedInt out=0; out<num_output_fields; out++) {
685         // Get output vector
686         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
687         CeedChkBackend(ierr);
688         // Check if active output
689         if (vec == CEED_VECTOR_ACTIVE) {
690           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
691                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
692           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
693           CeedChkBackend(ierr);
694           a += size*Q*blk_size; // Advance the pointer by the size of the output
695         }
696       }
697       // Apply QFunction
698       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
699       CeedChkBackend(ierr);
700     }
701   }
702 
703   // Un-set output Qvecs to prevent accidental overwrite of Assembled
704   for (CeedInt out=0; out<num_output_fields; out++) {
705     // Get output vector
706     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
707     CeedChkBackend(ierr);
708     // Check if active output
709     if (vec == CEED_VECTOR_ACTIVE) {
710       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
711                          NULL); CeedChkBackend(ierr);
712     }
713   }
714 
715   // Restore input arrays
716   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
717          op_input_fields, true, impl); CeedChkBackend(ierr);
718 
719   // Output blocked restriction
720   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
721   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
722   CeedElemRestriction blk_rstr = impl->qf_blk_rstr;
723   if (!impl->qf_blk_rstr) {
724     ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
725            num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
726            strides, &blk_rstr); CeedChkBackend(ierr);
727     impl->qf_blk_rstr = blk_rstr;
728   }
729   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
730                                   request); CeedChkBackend(ierr);
731 
732   return CEED_ERROR_SUCCESS;
733 }
734 
735 //------------------------------------------------------------------------------
736 // Assemble Linear QFunction
737 //------------------------------------------------------------------------------
738 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
739     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
740   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled,
741          rstr, request);
742 }
743 
744 //------------------------------------------------------------------------------
745 // Update Assembled Linear QFunction
746 //------------------------------------------------------------------------------
747 static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op,
748     CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
749   return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled,
750          &rstr, request);
751 }
752 
753 //------------------------------------------------------------------------------
754 // Operator Destroy
755 //------------------------------------------------------------------------------
756 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
757   int ierr;
758   CeedOperator_Blocked *impl;
759   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
760 
761   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
762     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
763     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
764   }
765   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
766   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
767   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
768   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
769 
770   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
771     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
772     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
773   }
774   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
775   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
776 
777   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
778     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
779     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
780   }
781   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
782   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
783 
784   // QFunction assembly data
785   for (CeedInt i=0; i<impl->qf_num_active_in; i++) {
786     ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr);
787   }
788   ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr);
789   ierr = CeedVectorDestroy(&impl->qf_lvec); CeedChkBackend(ierr);
790   ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr);
791 
792   ierr = CeedFree(&impl); CeedChkBackend(ierr);
793   return CEED_ERROR_SUCCESS;
794 }
795 
796 //------------------------------------------------------------------------------
797 // Operator Create
798 //------------------------------------------------------------------------------
799 int CeedOperatorCreate_Blocked(CeedOperator op) {
800   int ierr;
801   Ceed ceed;
802   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
803   CeedOperator_Blocked *impl;
804 
805   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
806   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
807 
808   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
809                                 CeedOperatorLinearAssembleQFunction_Blocked);
810   CeedChkBackend(ierr);
811   ierr = CeedSetBackendFunction(ceed, "Operator", op,
812                                 "LinearAssembleQFunctionUpdate",
813                                 CeedOperatorLinearAssembleQFunctionUpdate_Blocked);
814   CeedChkBackend(ierr);
815   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
816                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
817   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
818                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
819   return CEED_ERROR_SUCCESS;
820 }
821 //------------------------------------------------------------------------------
822