xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 05a9c2bb2f62eff0bfbb15aec60b0312b25f01c2)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include "ceed-blocked.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28     CeedOperator op, bool inOrOut, CeedElemRestriction *blk_restr,
29     CeedVector *full_evecs, CeedVector *e_vecs, CeedVector *q_vecs,
30     CeedInt start_e, CeedInt numfields, CeedInt Q) {
31   CeedInt dim, ierr, num_comp, size, P;
32   Ceed ceed;
33   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
34   CeedBasis basis;
35   CeedElemRestriction r;
36   CeedOperatorField *op_fields;
37   CeedQFunctionField *qf_fields;
38   if (inOrOut) {
39     ierr = CeedOperatorGetFields(op, NULL, &op_fields);
40     CeedChkBackend(ierr);
41     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields);
42     CeedChkBackend(ierr);
43   } else {
44     ierr = CeedOperatorGetFields(op, &op_fields, NULL);
45     CeedChkBackend(ierr);
46     ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL);
47     CeedChkBackend(ierr);
48   }
49   const CeedInt blk_size = 8;
50 
51   // Loop over fields
52   for (CeedInt i=0; i<numfields; i++) {
53     CeedEvalMode eval_mode;
54     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
55     CeedChkBackend(ierr);
56 
57     if (eval_mode != CEED_EVAL_WEIGHT) {
58       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
59       CeedChkBackend(ierr);
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
61       CeedInt num_elem, elem_size, l_size, comp_stride;
62       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
63       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
64       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
66 
67       bool strided;
68       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
69       if (strided) {
70         CeedInt strides[3];
71         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
72         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
73                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
74         CeedChkBackend(ierr);
75       } else {
76         const CeedInt *offsets = NULL;
77         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
78         CeedChkBackend(ierr);
79         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
80         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
81                                                 blk_size, num_comp, comp_stride,
82                                                 l_size, CEED_MEM_HOST,
83                                                 CEED_COPY_VALUES, offsets,
84                                                 &blk_restr[i+start_e]);
85         CeedChkBackend(ierr);
86         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
87       }
88       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
89                                              &full_evecs[i+start_e]);
90       CeedChkBackend(ierr);
91     }
92 
93     switch(eval_mode) {
94     case CEED_EVAL_NONE:
95       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
96       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
97       CeedChkBackend(ierr);
98       break;
99     case CEED_EVAL_INTERP:
100       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
101       ierr = CeedElemRestrictionGetElementSize(r, &P);
102       CeedChkBackend(ierr);
103       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
104       CeedChkBackend(ierr);
105       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
106       CeedChkBackend(ierr);
107       break;
108     case CEED_EVAL_GRAD:
109       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
110       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
111       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
112       ierr = CeedElemRestrictionGetElementSize(r, &P);
113       CeedChkBackend(ierr);
114       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
115       CeedChkBackend(ierr);
116       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
117       CeedChkBackend(ierr);
118       break;
119     case CEED_EVAL_WEIGHT: // Only on input fields
120       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
121       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
122       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
123                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
124       CeedChkBackend(ierr);
125 
126       break;
127     case CEED_EVAL_DIV:
128       break; // Not implemented
129     case CEED_EVAL_CURL:
130       break; // Not implemented
131     }
132   }
133   return CEED_ERROR_SUCCESS;
134 }
135 
136 //------------------------------------------------------------------------------
137 // Setup Operator
138 //------------------------------------------------------------------------------
139 static int CeedOperatorSetup_Blocked(CeedOperator op) {
140   int ierr;
141   bool setup_done;
142   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
143   if (setup_done) return CEED_ERROR_SUCCESS;
144   Ceed ceed;
145   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
146   CeedOperator_Blocked *impl;
147   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
148   CeedQFunction qf;
149   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
150   CeedInt Q, num_input_fields, num_output_fields;
151   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
152   ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr);
153   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
154   CeedChkBackend(ierr);
155   CeedOperatorField *op_input_fields, *op_output_fields;
156   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
157   CeedChkBackend(ierr);
158   CeedQFunctionField *qf_input_fields, *qf_output_fields;
159   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
160   CeedChkBackend(ierr);
161 
162   // Allocate
163   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
164   CeedChkBackend(ierr);
165   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
166   CeedChkBackend(ierr);
167   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
168   CeedChkBackend(ierr);
169 
170   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
171   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
172   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
173   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
174   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
175 
176   impl->num_e_vecs_in = num_input_fields;
177   impl->num_e_vecs_out = num_output_fields;
178 
179   // Set up infield and outfield pointer arrays
180   // Infields
181   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blk_restr,
182                                          impl->e_vecs, impl->e_vecs_in,
183                                          impl->q_vecs_in, 0,
184                                          num_input_fields, Q);
185   CeedChkBackend(ierr);
186   // Outfields
187   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blk_restr,
188                                          impl->e_vecs, impl->e_vecs_out,
189                                          impl->q_vecs_out, num_input_fields,
190                                          num_output_fields, Q);
191   CeedChkBackend(ierr);
192 
193   // Identity QFunctions
194   if (impl->is_identity_qf) {
195     CeedEvalMode in_mode, out_mode;
196     CeedQFunctionField *in_fields, *out_fields;
197     ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields);
198     CeedChkBackend(ierr);
199     ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode);
200     CeedChkBackend(ierr);
201     ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode);
202     CeedChkBackend(ierr);
203 
204     if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) {
205       impl->is_identity_restr_op = true;
206     } else {
207       ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr);
208       impl->q_vecs_out[0] = impl->q_vecs_in[0];
209       ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr);
210     }
211   }
212 
213   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
214 
215   return CEED_ERROR_SUCCESS;
216 }
217 
218 //------------------------------------------------------------------------------
219 // Setup Operator Inputs
220 //------------------------------------------------------------------------------
221 static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields,
222     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
223     CeedVector in_vec, bool skip_active, CeedOperator_Blocked *impl,
224     CeedRequest *request) {
225   CeedInt ierr;
226   CeedEvalMode eval_mode;
227   CeedVector vec;
228   uint64_t state;
229 
230   for (CeedInt i=0; i<num_input_fields; i++) {
231     // Get input vector
232     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
233     CeedChkBackend(ierr);
234     if (vec == CEED_VECTOR_ACTIVE) {
235       if (skip_active)
236         continue;
237       else
238         vec = in_vec;
239     }
240 
241     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
242     CeedChkBackend(ierr);
243     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
244     } else {
245       // Restrict
246       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
247       if (state != impl->input_state[i] || vec == in_vec) {
248         ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
249                                         vec, impl->e_vecs[i], request);
250         CeedChkBackend(ierr);
251         impl->input_state[i] = state;
252       }
253       // Get evec
254       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
255                                     (const CeedScalar **) &impl->e_data[i]);
256       CeedChkBackend(ierr);
257     }
258   }
259   return CEED_ERROR_SUCCESS;
260 }
261 
262 //------------------------------------------------------------------------------
263 // Input Basis Action
264 //------------------------------------------------------------------------------
265 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
266     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
267     CeedInt num_input_fields, CeedInt blk_size, bool skip_active,
268     CeedOperator_Blocked *impl) {
269   CeedInt ierr;
270   CeedInt dim, elem_size, size;
271   CeedElemRestriction elem_restr;
272   CeedEvalMode eval_mode;
273   CeedBasis basis;
274 
275   for (CeedInt i=0; i<num_input_fields; i++) {
276     // Skip active input
277     if (skip_active) {
278       CeedVector vec;
279       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
280       CeedChkBackend(ierr);
281       if (vec == CEED_VECTOR_ACTIVE)
282         continue;
283     }
284 
285     // Get elem_size, eval_mode, size
286     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
287     CeedChkBackend(ierr);
288     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
289     CeedChkBackend(ierr);
290     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
291     CeedChkBackend(ierr);
292     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
293     CeedChkBackend(ierr);
294     // Basis action
295     switch(eval_mode) {
296     case CEED_EVAL_NONE:
297       ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
298                                 CEED_USE_POINTER, &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
299       break;
300     case CEED_EVAL_INTERP:
301       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
302       CeedChkBackend(ierr);
303       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
304                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size]);
305       CeedChkBackend(ierr);
306       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
307                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
308                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
309       break;
310     case CEED_EVAL_GRAD:
311       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
312       CeedChkBackend(ierr);
313       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
314       ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
315                                 CEED_USE_POINTER, &impl->e_data[i][e*elem_size*size/dim]);
316       CeedChkBackend(ierr);
317       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
318                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
319                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
320       break;
321     case CEED_EVAL_WEIGHT:
322       break;  // No action
323     // LCOV_EXCL_START
324     case CEED_EVAL_DIV:
325     case CEED_EVAL_CURL: {
326       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
327       CeedChkBackend(ierr);
328       Ceed ceed;
329       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
330       return CeedError(ceed, CEED_ERROR_BACKEND,
331                        "Ceed evaluation mode not implemented");
332       // LCOV_EXCL_STOP
333     }
334     }
335   }
336   return CEED_ERROR_SUCCESS;
337 }
338 
339 //------------------------------------------------------------------------------
340 // Output Basis Action
341 //------------------------------------------------------------------------------
342 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
343     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
344     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
345     CeedOperator op, CeedOperator_Blocked *impl) {
346   CeedInt ierr;
347   CeedInt dim, elem_size, size;
348   CeedElemRestriction elem_restr;
349   CeedEvalMode eval_mode;
350   CeedBasis basis;
351 
352   for (CeedInt i=0; i<num_output_fields; i++) {
353     // Get elem_size, eval_mode, size
354     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
355     CeedChkBackend(ierr);
356     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
357     CeedChkBackend(ierr);
358     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
359     CeedChkBackend(ierr);
360     ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
361     CeedChkBackend(ierr);
362     // Basis action
363     switch(eval_mode) {
364     case CEED_EVAL_NONE:
365       break; // No action
366     case CEED_EVAL_INTERP:
367       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
368       CeedChkBackend(ierr);
369       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
370                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size]);
371       CeedChkBackend(ierr);
372       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
373                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
374                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
375       break;
376     case CEED_EVAL_GRAD:
377       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
378       CeedChkBackend(ierr);
379       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
380       ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
381                                 CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*elem_size*size/dim]);
382       CeedChkBackend(ierr);
383       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
384                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
385                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
386       break;
387     // LCOV_EXCL_START
388     case CEED_EVAL_WEIGHT: {
389       Ceed ceed;
390       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
391       return CeedError(ceed, CEED_ERROR_BACKEND,
392                        "CEED_EVAL_WEIGHT cannot be an output "
393                        "evaluation mode");
394     }
395     case CEED_EVAL_DIV:
396     case CEED_EVAL_CURL: {
397       Ceed ceed;
398       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
399       return CeedError(ceed, CEED_ERROR_BACKEND,
400                        "Ceed evaluation mode not implemented");
401       // LCOV_EXCL_STOP
402     }
403     }
404   }
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 //------------------------------------------------------------------------------
409 // Restore Input Vectors
410 //------------------------------------------------------------------------------
411 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields,
412     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
413     bool skip_active, CeedOperator_Blocked *impl) {
414   CeedInt ierr;
415   CeedEvalMode eval_mode;
416 
417   for (CeedInt i=0; i<num_input_fields; i++) {
418     // Skip active inputs
419     if (skip_active) {
420       CeedVector vec;
421       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
422       CeedChkBackend(ierr);
423       if (vec == CEED_VECTOR_ACTIVE)
424         continue;
425     }
426     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
427     CeedChkBackend(ierr);
428     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
429     } else {
430       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
431                                         (const CeedScalar **) &impl->e_data[i]);
432       CeedChkBackend(ierr);
433     }
434   }
435   return CEED_ERROR_SUCCESS;
436 }
437 
438 //------------------------------------------------------------------------------
439 // Operator Apply
440 //------------------------------------------------------------------------------
441 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec,
442                                         CeedVector out_vec,
443                                         CeedRequest *request) {
444   int ierr;
445   CeedOperator_Blocked *impl;
446   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
447   const CeedInt blk_size = 8;
448   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
449   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
450   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
451   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
452   CeedQFunction qf;
453   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
454   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
455   CeedChkBackend(ierr);
456   CeedOperatorField *op_input_fields, *op_output_fields;
457   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
458   CeedChkBackend(ierr);
459   CeedQFunctionField *qf_input_fields, *qf_output_fields;
460   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
461   CeedChkBackend(ierr);
462   CeedEvalMode eval_mode;
463   CeedVector vec;
464 
465   // Setup
466   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
467 
468   // Restriction only operator
469   if (impl->is_identity_restr_op) {
470     ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec,
471                                     impl->e_vecs[0], request); CeedChkBackend(ierr);
472     ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE,
473                                     impl->e_vecs[0], out_vec, request); CeedChkBackend(ierr);
474     return CEED_ERROR_SUCCESS;
475   }
476 
477   // Input Evecs and Restriction
478   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
479                                          op_input_fields, in_vec, false, impl,
480                                          request); CeedChkBackend(ierr);
481 
482   // Output Evecs
483   for (CeedInt i=0; i<num_output_fields; i++) {
484     ierr = CeedVectorGetArray(impl->e_vecs[i+impl->num_e_vecs_in], CEED_MEM_HOST,
485                               &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
486   }
487 
488   // Loop through elements
489   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
490     // Output pointers
491     for (CeedInt i=0; i<num_output_fields; i++) {
492       ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
493       CeedChkBackend(ierr);
494       if (eval_mode == CEED_EVAL_NONE) {
495         ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
496         CeedChkBackend(ierr);
497         ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
498                                   CEED_USE_POINTER, &impl->e_data[i + num_input_fields][e*Q*size]);
499         CeedChkBackend(ierr);
500       }
501     }
502 
503     // Input basis apply
504     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
505                                           num_input_fields, blk_size, false, impl);
506     CeedChkBackend(ierr);
507 
508     // Q function
509     if (!impl->is_identity_qf) {
510       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
511       CeedChkBackend(ierr);
512     }
513 
514     // Output basis apply
515     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields,
516                                            blk_size, num_input_fields,
517                                            num_output_fields, op, impl);
518     CeedChkBackend(ierr);
519   }
520 
521   // Output restriction
522   for (CeedInt i=0; i<num_output_fields; i++) {
523     // Restore evec
524     ierr = CeedVectorRestoreArray(impl->e_vecs[i+impl->num_e_vecs_in],
525                                   &impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
526     // Get output vector
527     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
528     CeedChkBackend(ierr);
529     // Active
530     if (vec == CEED_VECTOR_ACTIVE)
531       vec = out_vec;
532     // Restrict
533     ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_e_vecs_in],
534                                     CEED_TRANSPOSE, impl->e_vecs[i+impl->num_e_vecs_in],
535                                     vec, request); CeedChkBackend(ierr);
536 
537   }
538 
539   // Restore input arrays
540   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
541          op_input_fields, false, impl); CeedChkBackend(ierr);
542 
543   return CEED_ERROR_SUCCESS;
544 }
545 
546 //------------------------------------------------------------------------------
547 // Assemble Linear QFunction
548 //------------------------------------------------------------------------------
549 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
550     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
551   int ierr;
552   CeedOperator_Blocked *impl;
553   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
554   const CeedInt blk_size = 8;
555   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
556   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
557   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
558   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
559   CeedQFunction qf;
560   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
561   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
562   CeedChkBackend(ierr);
563   CeedOperatorField *op_input_fields, *op_output_fields;
564   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
565   CeedChkBackend(ierr);
566   CeedQFunctionField *qf_input_fields, *qf_output_fields;
567   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
568   CeedChkBackend(ierr);
569   CeedVector vec, lvec;
570   CeedInt num_active_in = 0, num_active_out = 0;
571   CeedVector *active_in = NULL;
572   CeedScalar *a, *tmp;
573   Ceed ceed;
574   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
575 
576   // Setup
577   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
578 
579   // Check for identity
580   if (impl->is_identity_qf)
581     // LCOV_EXCL_START
582     return CeedError(ceed, CEED_ERROR_BACKEND,
583                      "Assembling identity QFunctions not supported");
584   // LCOV_EXCL_STOP
585 
586   // Input Evecs and Restriction
587   ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields,
588                                          op_input_fields, NULL, true, impl,
589                                          request); CeedChkBackend(ierr);
590 
591   // Count number of active input fields
592   for (CeedInt i=0; i<num_input_fields; i++) {
593     // Get input vector
594     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
595     CeedChkBackend(ierr);
596     // Check if active input
597     if (vec == CEED_VECTOR_ACTIVE) {
598       ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
599       CeedChkBackend(ierr);
600       ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
601       ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
602       CeedChkBackend(ierr);
603       ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
604       for (CeedInt field=0; field<size; field++) {
605         ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
606         CeedChkBackend(ierr);
607         ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
608                                   CEED_USE_POINTER, &tmp[field*Q*blk_size]);
609         CeedChkBackend(ierr);
610       }
611       num_active_in += size;
612       ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
613     }
614   }
615 
616   // Count number of active output fields
617   for (CeedInt i=0; i<num_output_fields; i++) {
618     // Get output vector
619     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
620     CeedChkBackend(ierr);
621     // Check if active output
622     if (vec == CEED_VECTOR_ACTIVE) {
623       ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
624       CeedChkBackend(ierr);
625       num_active_out += size;
626     }
627   }
628 
629   // Check sizes
630   if (!num_active_in || !num_active_out)
631     // LCOV_EXCL_START
632     return CeedError(ceed, CEED_ERROR_BACKEND,
633                      "Cannot assemble QFunction without active inputs "
634                      "and outputs");
635   // LCOV_EXCL_STOP
636 
637   // Setup Lvec
638   ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
639                           &lvec); CeedChkBackend(ierr);
640   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
641 
642   // Create output restriction
643   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
644   ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
645                                           num_active_in*num_active_out,
646                                           num_active_in*num_active_out*num_elem*Q,
647                                           strides, rstr); CeedChkBackend(ierr);
648   // Create assembled vector
649   ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
650                           assembled); CeedChkBackend(ierr);
651 
652   // Loop through elements
653   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
654     // Input basis apply
655     ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields,
656                                           num_input_fields, blk_size, true, impl);
657     CeedChkBackend(ierr);
658 
659     // Assemble QFunction
660     for (CeedInt in=0; in<num_active_in; in++) {
661       // Set Inputs
662       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
663       if (num_active_in > 1) {
664         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
665                                   0.0); CeedChkBackend(ierr);
666       }
667       // Set Outputs
668       for (CeedInt out=0; out<num_output_fields; out++) {
669         // Get output vector
670         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
671         CeedChkBackend(ierr);
672         // Check if active output
673         if (vec == CEED_VECTOR_ACTIVE) {
674           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
675                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
676           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
677           CeedChkBackend(ierr);
678           a += size*Q*blk_size; // Advance the pointer by the size of the output
679         }
680       }
681       // Apply QFunction
682       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
683       CeedChkBackend(ierr);
684     }
685   }
686 
687   // Un-set output Qvecs to prevent accidental overwrite of Assembled
688   for (CeedInt out=0; out<num_output_fields; out++) {
689     // Get output vector
690     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
691     CeedChkBackend(ierr);
692     // Check if active output
693     if (vec == CEED_VECTOR_ACTIVE) {
694       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
695                          NULL); CeedChkBackend(ierr);
696     }
697   }
698 
699   // Restore input arrays
700   ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields,
701          op_input_fields, true, impl); CeedChkBackend(ierr);
702 
703   // Output blocked restriction
704   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
705   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
706   CeedElemRestriction blk_rstr;
707   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
708          num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
709          strides, &blk_rstr); CeedChkBackend(ierr);
710   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
711                                   request); CeedChkBackend(ierr);
712 
713   // Cleanup
714   for (CeedInt i=0; i<num_active_in; i++) {
715     ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr);
716   }
717   ierr = CeedFree(&active_in); CeedChkBackend(ierr);
718   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
719   ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr);
720 
721   return CEED_ERROR_SUCCESS;
722 }
723 
724 //------------------------------------------------------------------------------
725 // Operator Destroy
726 //------------------------------------------------------------------------------
727 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
728   int ierr;
729   CeedOperator_Blocked *impl;
730   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
731 
732   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
733     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
734     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
735   }
736   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
737   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
738   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
739   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
740 
741   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
742     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
743     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
744   }
745   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
746   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
747 
748   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
749     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
750     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
751   }
752   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
753   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
754 
755   ierr = CeedFree(&impl); CeedChkBackend(ierr);
756   return CEED_ERROR_SUCCESS;
757 }
758 
759 //------------------------------------------------------------------------------
760 // Operator Create
761 //------------------------------------------------------------------------------
762 int CeedOperatorCreate_Blocked(CeedOperator op) {
763   int ierr;
764   Ceed ceed;
765   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
766   CeedOperator_Blocked *impl;
767 
768   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
769   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
770 
771   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
772                                 CeedOperatorLinearAssembleQFunction_Blocked);
773   CeedChkBackend(ierr);
774   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
775                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
776   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
777                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
778   return CEED_ERROR_SUCCESS;
779 }
780 //------------------------------------------------------------------------------
781