xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision d1d35e2f02dc969aee8debf3fd943dd784aa847a)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include "ceed-blocked.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28     CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr,
29     CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs,
30     CeedInt start_e, CeedInt numfields, CeedInt Q) {
31   CeedInt dim, ierr, num_comp, size, P;
32   Ceed ceed;
33   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
34   CeedBasis basis;
35   CeedElemRestriction r;
36   CeedOperatorField *op_fields;
37   CeedQFunctionField *qf_fields;
38   if (inOrOut) {
39     ierr = CeedOperatorGetFields(op, NULL, &op_fields);
40     CeedChkBackend(ierr);
41     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields);
42     CeedChkBackend(ierr);
43   } else {
44     ierr = CeedOperatorGetFields(op, &op_fields, NULL);
45     CeedChkBackend(ierr);
46     ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL);
47     CeedChkBackend(ierr);
48   }
49   const CeedInt blk_size = 8;
50 
51   // Loop over fields
52   for (CeedInt i=0; i<numfields; i++) {
53     CeedEvalMode eval_mode;
54     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
55     CeedChkBackend(ierr);
56 
57     if (eval_mode != CEED_EVAL_WEIGHT) {
58       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
59       CeedChkBackend(ierr);
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
61       CeedInt num_elem, elem_size, l_size, comp_stride;
62       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
63       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
64       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
66 
67       bool strided;
68       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
69       if (strided) {
70         CeedInt strides[3];
71         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
72         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
73                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
74         CeedChkBackend(ierr);
75       } else {
76         const CeedInt *offsets = NULL;
77         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
78         CeedChkBackend(ierr);
79         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
80         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
81                                                 blk_size, num_comp, comp_stride,
82                                                 l_size, CEED_MEM_HOST,
83                                                 CEED_COPY_VALUES, offsets,
84                                                 &blk_restr[i+start_e]);
85         CeedChkBackend(ierr);
86         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
87       }
88       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
89                                              &full_evecs[i+start_e]);
90       CeedChkBackend(ierr);
91     }
92 
93     switch(eval_mode) {
94     case CEED_EVAL_NONE:
95       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
96       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
97       CeedChkBackend(ierr);
98       break;
99     case CEED_EVAL_INTERP:
100       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
101       ierr = CeedElemRestrictionGetElementSize(r, &P);
102       CeedChkBackend(ierr);
103       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
104       CeedChkBackend(ierr);
105       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
106       CeedChkBackend(ierr);
107       break;
108     case CEED_EVAL_GRAD:
109       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
110       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
111       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
112       ierr = CeedElemRestrictionGetElementSize(r, &P);
113       CeedChkBackend(ierr);
114       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
115       CeedChkBackend(ierr);
116       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
117       CeedChkBackend(ierr);
118       break;
119     case CEED_EVAL_WEIGHT: // Only on input fields
120       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
121       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
122       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
123                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
124       CeedChkBackend(ierr);
125 
126       break;
127     case CEED_EVAL_DIV:
128       break; // Not implemented
129     case CEED_EVAL_CURL:
130       break; // Not implemented
131     }
132   }
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 //------------------------------------------------------------------------------
137 // Setup Operator
138 //------------------------------------------------------------------------------
139 static int CeedOperatorSetup_Blocked(CeedOperator op) {
140   int ierr;
141   bool setup_done;
142   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
143   if (setup_done) return CEED_ERROR_SUCCESS;
144   Ceed ceed;
145   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
146   CeedOperator_Blocked *impl;
147   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
148   CeedQFunction qf;
149   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
150   CeedInt Q, num_input_fields, num_output_fields;
151   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
152   ierr = CeedQFunctionIsIdentity(qf, &impl->identity_qf); CeedChkBackend(ierr);
153   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
154   CeedChkBackend(ierr);
155   CeedOperatorField *op_input_fields, *op_output_fields;
156   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
157   CeedChkBackend(ierr);
158   CeedQFunctionField *qf_input_fields, *qf_output_fields;
159   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
160   CeedChkBackend(ierr);
161 
162   // Allocate
163   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
164   CeedChkBackend(ierr);
165   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
166   CeedChkBackend(ierr);
167   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
168   CeedChkBackend(ierr);
169 
170   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
171   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
172   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
173   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
174   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
175 
176   impl->num_e_vecs_in = num_input_fields;
177   impl->num_e_vecs_out = num_output_fields;
178 
179   // Set up infield and outfield pointer arrays
180   // Infields
181   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr,
182                                          impl->e_vecs, impl->e_vecs_in,
183                                          impl->q_vecs_in, 0,
184                                          num_input_fields, Q);
185   CeedChkBackend(ierr);
186   // Outfields
187   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr,
188                                          impl->e_vecs, impl->e_vecs_out,
189                                          impl->q_vecs_out, num_input_fields,
190                                          num_output_fields, Q);
191   CeedChkBackend(ierr);
192 
193   // Identity QFunctions
194   if (impl->identity_qf) {
195     CeedEvalMode in_mode, out_mode;
196     CeedQFunctionField *in_fields, *out_fields;
197     ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields);
198     CeedChkBackend(ierr);
199 
200     for (CeedInt i=0; i<num_input_fields; i++) {
201       ierr = CeedQFunctionFieldGetEvalMode(in_fields[i], &in_mode);
202       CeedChkBackend(ierr);
203       ierr = CeedQFunctionFieldGetEvalMode(out_fields[i], &out_mode);
204       CeedChkBackend(ierr);
205 
206       ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
207       impl->q_vecs_out[i] = impl->q_vecs_in[i];
208       ierr = CeedVectorAddReference(impl->q_vecs_in[i]); CeedChkBackend(ierr);
209     }
210   }
211 
212   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
213 
214   return CEED_ERROR_SUCCESS;
215 }
216 
217 //------------------------------------------------------------------------------
218 // Setup Operator Inputs
219 //------------------------------------------------------------------------------
220 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
221     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
222     CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl,
223     CeedRequest *request) {
224   CeedInt ierr;
225   CeedEvalMode eval_mode;
226   CeedVector vec;
227   uint64_t state;
228 
229   for (CeedInt i=0; i<num_input_fields; i++) {
230     // Get input vector
231     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
232     CeedChkBackend(ierr);
233     if (vec == CEED_VECTOR_ACTIVE) {
234       if (skip_active)
235         continue;
236       else
237         vec = in_vec;
238     }
239 
240     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
241     CeedChkBackend(ierr);
242     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
243     } else {
244       // Restrict
245       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
246       if (state != impl->input_state[i] || vec == in_vec) {
247         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
248                                         vec, impl->e_vecs[i], request);
249         CeedChkBackend(ierr);
250         impl->input_state[i] = state;
251       }
252       // Get evec
253       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
254                                     (const CeedScalar **) &impl->e_data[i]);
255       CeedChkBackend(ierr);
256     }
257   }
258   return CEED_ERROR_SUCCESS;
259 }
260 
261 //------------------------------------------------------------------------------
262 // Input Basis Action
263 //------------------------------------------------------------------------------
264 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
265     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
266     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
267     CeedOperator_Blocked *impl) {
268   CeedInt ierr;
269   CeedInt dim, elem_size, size;
270   CeedElemRestriction elem_restr;
271   CeedEvalMode eval_mode;
272   CeedBasis basis;
273 
274   for (CeedInt i=0; i<num_input_fields; i++) {
275     // Skip active input
276     if (skip_active) {
277       CeedVector vec;
278       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
279       CeedChkBackend(ierr);
280       if (vec == CEED_VECTOR_ACTIVE)
281         continue;
282     }
283 
284     // Get elem_size, eval_mode, size
285     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
286     CeedChkBackend(ierr);
287     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
288     CeedChkBackend(ierr);
289     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
290     CeedChkBackend(ierr);
291     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
292     CeedChkBackend(ierr);
293     // Basis action
294     switch(eval_mode) {
295     case CEED_EVAL_NONE:
296       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
297                                 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
298       break;
299     case CEED_EVAL_INTERP:
300       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
301       CeedChkBackend(ierr);
302       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
303                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]);
304       CeedChkBackend(ierr);
305       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
306                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
307                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
308       break;
309     case CEED_EVAL_GRAD:
310       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
311       CeedChkBackend(ierr);
312       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
313       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
314                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]);
315       CeedChkBackend(ierr);
316       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
317                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
318                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
319       break;
320     case CEED_EVAL_WEIGHT:
321       break;  // No action
322     // LCOV_EXCL_START
323     case CEED_EVAL_DIV:
324     case CEED_EVAL_CURL: {
325       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
326       CeedChkBackend(ierr);
327       Ceed ceed;
328       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
329       return CeedError(ceed, CEED_ERROR_BACKEND,
330                        "Ceed evaluation mode not implemented");
331       // LCOV_EXCL_STOP
332     }
333     }
334   }
335   return CEED_ERROR_SUCCESS;
336 }
337 
338 //------------------------------------------------------------------------------
339 // Output Basis Action
340 //------------------------------------------------------------------------------
341 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
342     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
343     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
344     CeedOperator op, CeedOperator_Blocked *impl) {
345   CeedInt ierr;
346   CeedInt dim, elem_size, size;
347   CeedElemRestriction elem_restr;
348   CeedEvalMode eval_mode;
349   CeedBasis basis;
350 
351   for (CeedInt i=0; i<num_output_fields; i++) {
352     // Get elem_size, eval_mode, size
353     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
354     CeedChkBackend(ierr);
355     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
356     CeedChkBackend(ierr);
357     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
358     CeedChkBackend(ierr);
359     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
360     CeedChkBackend(ierr);
361     // Basis action
362     switch(eval_mode) {
363     case CEED_EVAL_NONE:
364       break; // No action
365     case CEED_EVAL_INTERP:
366       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
367       CeedChkBackend(ierr);
368       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
369                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]);
370       CeedChkBackend(ierr);
371       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
372                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
373                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
374       break;
375     case CEED_EVAL_GRAD:
376       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
377       CeedChkBackend(ierr);
378       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
379       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
380                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]);
381       CeedChkBackend(ierr);
382       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
383                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
384                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
385       break;
386     // LCOV_EXCL_START
387     case CEED_EVAL_WEIGHT: {
388       Ceed ceed;
389       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
390       return CeedError(ceed, CEED_ERROR_BACKEND,
391                        "CEED_EVAL_WEIGHT cannot be an output "
392                        "evaluation mode");
393     }
394     case CEED_EVAL_DIV:
395     case CEED_EVAL_CURL: {
396       Ceed ceed;
397       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
398       return CeedError(ceed, CEED_ERROR_BACKEND,
399                        "Ceed evaluation mode not implemented");
400       // LCOV_EXCL_STOP
401     }
402     }
403   }
404   return CEED_ERROR_SUCCESS;
405 }
406 
407 //------------------------------------------------------------------------------
408 // Restore Input Vectors
409 //------------------------------------------------------------------------------
410 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
411     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
412     bool skip_active, CeedOperator_Blocked *impl) {
413   CeedInt ierr;
414   CeedEvalMode eval_mode;
415 
416   for (CeedInt i=0; i<num_input_fields; i++) {
417     // Skip active inputs
418     if (skip_active) {
419       CeedVector vec;
420       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
421       CeedChkBackend(ierr);
422       if (vec == CEED_VECTOR_ACTIVE)
423         continue;
424     }
425     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
426     CeedChkBackend(ierr);
427     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
428     } else {
429       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
430                                         (const CeedScalar **) &impl->e_data[i]);
431       CeedChkBackend(ierr);
432     }
433   }
434   return CEED_ERROR_SUCCESS;
435 }
436 
437 //------------------------------------------------------------------------------
438 // Operator Apply
439 //------------------------------------------------------------------------------
440 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
441                                         CeedVector out_vec,
442                                         CeedRequest *request) {
443   int ierr;
444   CeedOperator_Blocked *impl;
445   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
446   const CeedInt blk_size = 8;
447   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
448   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
449   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
450   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
451   CeedQFunction qf;
452   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
453   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
454   CeedChkBackend(ierr);
455   CeedOperatorField *op_input_fields, *op_output_fields;
456   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
457   CeedChkBackend(ierr);
458   CeedQFunctionField *qf_input_fields, *qf_output_fields;
459   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
460   CeedChkBackend(ierr);
461   CeedEvalMode eval_mode;
462   CeedVector vec;
463 
464   // Setup
465   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
466 
467   // Input Evecs and Restriction
468   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
469                                          op_input_fields, in_vec, false, impl,
470                                          request); CeedChkBackend(ierr);
471 
472   // Output Evecs
473   for (CeedInt i=0; i<num_output_fields; i++) {
474     ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST,
475                               &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
476   }
477 
478   // Loop through elements
479   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
480     // Output pointers
481     for (CeedInt i=0; i<num_output_fields; i++) {
482       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
483       CeedChkBackend(ierr);
484       if (eval_mode == CEED_EVAL_NONE) {
485         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
486         CeedChkBackend(ierr);
487         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
488                                   CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]);
489         CeedChkBackend(ierr);
490       }
491     }
492 
493     // Input basis apply
494     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
495                                           num_input_fields, blk_size, false, impl);
496     CeedChkBackend(ierr);
497 
498     // Q function
499     if (!impl->identity_qf) {
500       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
501       CeedChkBackend(ierr);
502     }
503 
504     // Output basis apply
505     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
506                                            blk_size, num_input_fields,
507                                            num_output_fields, op, impl);
508     CeedChkBackend(ierr);
509   }
510 
511   // Output restriction
512   for (CeedInt i=0; i<num_output_fields; i++) {
513     // Restore evec
514     ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in],
515                                   &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
516     // Get output vector
517     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
518     CeedChkBackend(ierr);
519     // Active
520     if (vec == CEED_VECTOR_ACTIVE)
521       vec = out_vec;
522     // Restrict
523     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in],
524                                     CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in],
525                                     vec, request); CeedChkBackend(ierr);
526 
527   }
528 
529   // Restore input arrays
530   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
531          op_input_fields, false, impl); CeedChkBackend(ierr);
532 
533   return CEED_ERROR_SUCCESS;
534 }
535 
536 //------------------------------------------------------------------------------
537 // Assemble Linear QFunction
538 //------------------------------------------------------------------------------
539 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
540     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
541   int ierr;
542   CeedOperator_Blocked *impl;
543   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
544   const CeedInt blk_size = 8;
545   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
546   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
547   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
548   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
549   CeedQFunction qf;
550   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
551   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
552   CeedChkBackend(ierr);
553   CeedOperatorField *op_input_fields, *op_output_fields;
554   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
555   CeedChkBackend(ierr);
556   CeedQFunctionField *qf_input_fields, *qf_output_fields;
557   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
558   CeedChkBackend(ierr);
559   CeedVector vec, lvec;
560   CeedInt num_active_in = 0, num_active_out = 0;
561   CeedVector *active_in = NULL;
562   CeedScalar *a, *tmp;
563   Ceed ceed;
564   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
565 
566   // Setup
567   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
568 
569   // Check for identity
570   if (impl->identity_qf)
571     // LCOV_EXCL_START
572     return CeedError(ceed, CEED_ERROR_BACKEND,
573                      "Assembling identity QFunctions not supported");
574   // LCOV_EXCL_STOP
575 
576   // Input Evecs and Restriction
577   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
578                                          op_input_fields, NULL, true, impl,
579                                          request); CeedChkBackend(ierr);
580 
581   // Count number of active input fields
582   for (CeedInt i=0; i<num_input_fields; i++) {
583     // Get input vector
584     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
585     CeedChkBackend(ierr);
586     // Check if active input
587     if (vec == CEED_VECTOR_ACTIVE) {
588       ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
589       CeedChkBackend(ierr);
590       ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
591       ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
592       CeedChkBackend(ierr);
593       ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
594       for (CeedInt field=0; field<size; field++) {
595         ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
596         CeedChkBackend(ierr);
597         ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
598                                   CEED_USE_POINTER, &tmp[field*Q*blk_size]);
599         CeedChkBackend(ierr);
600       }
601       num_active_in += size;
602       ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
603     }
604   }
605 
606   // Count number of active output fields
607   for (CeedInt i=0; i<num_output_fields; i++) {
608     // Get output vector
609     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
610     CeedChkBackend(ierr);
611     // Check if active output
612     if (vec == CEED_VECTOR_ACTIVE) {
613       ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
614       CeedChkBackend(ierr);
615       num_active_out += size;
616     }
617   }
618 
619   // Check sizes
620   if (!num_active_in || !num_active_out)
621     // LCOV_EXCL_START
622     return CeedError(ceed, CEED_ERROR_BACKEND,
623                      "Cannot assemble QFunction without active inputs "
624                      "and outputs");
625   // LCOV_EXCL_STOP
626 
627   // Setup Lvec
628   ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
629                           &lvec); CeedChkBackend(ierr);
630   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
631 
632   // Create output restriction
633   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
634   ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
635                                           num_active_in*num_active_out,
636                                           num_active_in*num_active_out*num_elem*Q,
637                                           strides, rstr); CeedChkBackend(ierr);
638   // Create assembled vector
639   ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
640                           assembled); CeedChkBackend(ierr);
641 
642   // Loop through elements
643   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
644     // Input basis apply
645     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
646                                           num_input_fields, blk_size, true, impl);
647     CeedChkBackend(ierr);
648 
649     // Assemble QFunction
650     for (CeedInt in=0; in<num_active_in; in++) {
651       // Set Inputs
652       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
653       if (num_active_in > 1) {
654         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
655                                   0.0); CeedChkBackend(ierr);
656       }
657       // Set Outputs
658       for (CeedInt out=0; out<num_output_fields; out++) {
659         // Get output vector
660         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
661         CeedChkBackend(ierr);
662         // Check if active output
663         if (vec == CEED_VECTOR_ACTIVE) {
664           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
665                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
666           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
667           CeedChkBackend(ierr);
668           a += size*Q*blk_size; // Advance the pointer by the size of the output
669         }
670       }
671       // Apply QFunction
672       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
673       CeedChkBackend(ierr);
674     }
675   }
676 
677   // Un-set output Qvecs to prevent accidental overwrite of Assembled
678   for (CeedInt out=0; out<num_output_fields; out++) {
679     // Get output vector
680     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
681     CeedChkBackend(ierr);
682     // Check if active output
683     if (vec == CEED_VECTOR_ACTIVE) {
684       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
685                          NULL); CeedChkBackend(ierr);
686     }
687   }
688 
689   // Restore input arrays
690   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
691          op_input_fields, true, impl); CeedChkBackend(ierr);
692 
693   // Output blocked restriction
694   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
695   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
696   CeedElemRestriction blk_rstr;
697   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
698          num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
699          strides, &blk_rstr); CeedChkBackend(ierr);
700   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
701                                   request); CeedChkBackend(ierr);
702 
703   // Cleanup
704   for (CeedInt i=0; i<num_active_in; i++) {
705     ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr);
706   }
707   ierr = CeedFree(&active_in); CeedChkBackend(ierr);
708   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
709   ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr);
710 
711   return CEED_ERROR_SUCCESS;
712 }
713 
714 //------------------------------------------------------------------------------
715 // Operator Destroy
716 //------------------------------------------------------------------------------
717 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
718   int ierr;
719   CeedOperator_Blocked *impl;
720   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
721 
722   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
723     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
724     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
725   }
726   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
727   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
728   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
729   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
730 
731   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
732     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
733     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
734   }
735   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
736   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
737 
738   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
739     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
740     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
741   }
742   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
743   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
744 
745   ierr = CeedFree(&impl); CeedChkBackend(ierr);
746   return CEED_ERROR_SUCCESS;
747 }
748 
749 //------------------------------------------------------------------------------
750 // Operator Create
751 //------------------------------------------------------------------------------
752 int CeedOperatorCreate_Blocked(CeedOperator op) {
753   int ierr;
754   Ceed ceed;
755   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
756   CeedOperator_Blocked *impl;
757 
758   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
759   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
760 
761   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
762                                 CeedOperatorLinearAssembleQFunction_Blocked);
763   CeedChkBackend(ierr);
764   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
765                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
766   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
767                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
768   return CEED_ERROR_SUCCESS;
769 }
770 //------------------------------------------------------------------------------
771