1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 34a2e7687Sjeremylt // 4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 54a2e7687Sjeremylt // 6*3d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 74a2e7687Sjeremylt 8ec3da8bcSJed Brown #include <ceed/ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 113d576824SJeremy L Thompson #include <stddef.h> 123d576824SJeremy L Thompson #include <stdint.h> 134a2e7687Sjeremylt #include "ceed-blocked.h" 144a2e7687Sjeremylt 15f10650afSjeremylt //------------------------------------------------------------------------------ 16f10650afSjeremylt // Setup Input/Output Fields 17f10650afSjeremylt //------------------------------------------------------------------------------ 1889c6efa4Sjeremylt static int CeedOperatorSetupFields_Blocked(CeedQFunction qf, 194fc1f125SJeremy L Thompson CeedOperator op, bool is_input, CeedElemRestriction *blk_restr, 204fc1f125SJeremy L Thompson CeedVector *e_vecs_full, CeedVector *e_vecs, CeedVector *q_vecs, 214fc1f125SJeremy L Thompson CeedInt start_e, CeedInt num_fields, CeedInt Q) { 22ebbcc8a3SJeremy L Thompson CeedInt ierr, num_comp, size, P; 23aedaa0e5Sjeremylt Ceed ceed; 24e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 25d1bcdac9Sjeremylt CeedBasis basis; 26d1bcdac9Sjeremylt CeedElemRestriction r; 27d1d35e2fSjeremylt CeedOperatorField *op_fields; 28d1d35e2fSjeremylt CeedQFunctionField *qf_fields; 294fc1f125SJeremy L Thompson if (is_input) { 307e7773b5SJeremy L Thompson ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); 31e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 327e7773b5SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); 33e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 344fc1f125SJeremy L Thompson } else { 354fc1f125SJeremy L Thompson ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); 364fc1f125SJeremy L Thompson CeedChkBackend(ierr); 374fc1f125SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); 384fc1f125SJeremy L Thompson CeedChkBackend(ierr); 39fe2413ffSjeremylt } 40d1d35e2fSjeremylt const CeedInt blk_size = 8; 414a2e7687Sjeremylt 424a2e7687Sjeremylt // Loop over fields 434fc1f125SJeremy L Thompson for (CeedInt i=0; i<num_fields; i++) { 44d1d35e2fSjeremylt CeedEvalMode eval_mode; 45d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); 46d1d35e2fSjeremylt CeedChkBackend(ierr); 474a2e7687Sjeremylt 48d1d35e2fSjeremylt if (eval_mode != CEED_EVAL_WEIGHT) { 49d1d35e2fSjeremylt ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r); 50e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 51e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 52e79b91d9SJeremy L Thompson CeedSize l_size; 53e79b91d9SJeremy L Thompson CeedInt num_elem, elem_size, comp_stride; 54d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 55d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 56d1d35e2fSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 57d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 58bd33150aSjeremylt 593ac43b2cSJeremy L Thompson bool strided; 60e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr); 613ac43b2cSJeremy L Thompson if (strided) { 623ac43b2cSJeremy L Thompson CeedInt strides[3]; 63e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 64d1d35e2fSjeremylt ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size, 65d1d35e2fSjeremylt blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]); 66e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 673ac43b2cSJeremy L Thompson } else { 68bd33150aSjeremylt const CeedInt *offsets = NULL; 69bd33150aSjeremylt ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets); 70e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 71d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 72d1d35e2fSjeremylt ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size, 73d1d35e2fSjeremylt blk_size, num_comp, comp_stride, 74d1d35e2fSjeremylt l_size, CEED_MEM_HOST, 75bd33150aSjeremylt CEED_COPY_VALUES, offsets, 76d1d35e2fSjeremylt &blk_restr[i+start_e]); 77e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 78e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr); 793ac43b2cSJeremy L Thompson } 80d1d35e2fSjeremylt ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL, 814fc1f125SJeremy L Thompson &e_vecs_full[i+start_e]); 82e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 834a2e7687Sjeremylt } 844a2e7687Sjeremylt 85d1d35e2fSjeremylt switch(eval_mode) { 864a2e7687Sjeremylt case CEED_EVAL_NONE: 87d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 88d1d35e2fSjeremylt ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 89d1d35e2fSjeremylt CeedChkBackend(ierr); 90aedaa0e5Sjeremylt break; 91aedaa0e5Sjeremylt case CEED_EVAL_INTERP: 924a2e7687Sjeremylt case CEED_EVAL_GRAD: 93d1d35e2fSjeremylt ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 94d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr); 95ebbcc8a3SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &P); CeedChkBackend(ierr); 96ebbcc8a3SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChkBackend(ierr); 97ebbcc8a3SJeremy L Thompson ierr = CeedVectorCreate(ceed, P*num_comp*blk_size, &e_vecs[i]); 98e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 99d1d35e2fSjeremylt ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]); 100d1d35e2fSjeremylt CeedChkBackend(ierr); 1014a2e7687Sjeremylt break; 1024a2e7687Sjeremylt case CEED_EVAL_WEIGHT: // Only on input fields 103d1d35e2fSjeremylt ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr); 104d1d35e2fSjeremylt ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr); 105d1d35e2fSjeremylt ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 106d1d35e2fSjeremylt CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]); 107e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 108aedaa0e5Sjeremylt 1094a2e7687Sjeremylt break; 1104a2e7687Sjeremylt case CEED_EVAL_DIV: 1114d537eeaSYohann break; // Not implemented 1124a2e7687Sjeremylt case CEED_EVAL_CURL: 1134d537eeaSYohann break; // Not implemented 1144a2e7687Sjeremylt } 1154a2e7687Sjeremylt } 116e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1174a2e7687Sjeremylt } 1184a2e7687Sjeremylt 119f10650afSjeremylt //------------------------------------------------------------------------------ 120f10650afSjeremylt // Setup Operator 121f10650afSjeremylt //------------------------------------------------------------------------------ 1224a2e7687Sjeremylt static int CeedOperatorSetup_Blocked(CeedOperator op) { 1234a2e7687Sjeremylt int ierr; 1248c1105f8SJeremy L Thompson bool is_setup_done; 1258c1105f8SJeremy L Thompson ierr = CeedOperatorIsSetupDone(op, &is_setup_done); CeedChkBackend(ierr); 1268c1105f8SJeremy L Thompson if (is_setup_done) return CEED_ERROR_SUCCESS; 127aedaa0e5Sjeremylt Ceed ceed; 128e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 1294ce2993fSjeremylt CeedOperator_Blocked *impl; 130e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 1314ce2993fSjeremylt CeedQFunction qf; 132e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 133d1d35e2fSjeremylt CeedInt Q, num_input_fields, num_output_fields; 134e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 1350b454692Sjeremylt ierr = CeedQFunctionIsIdentity(qf, &impl->is_identity_qf); CeedChkBackend(ierr); 136d1d35e2fSjeremylt CeedOperatorField *op_input_fields, *op_output_fields; 1377e7773b5SJeremy L Thompson ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 1387e7773b5SJeremy L Thompson &num_output_fields, &op_output_fields); 139e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 140d1d35e2fSjeremylt CeedQFunctionField *qf_input_fields, *qf_output_fields; 1417e7773b5SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 1427e7773b5SJeremy L Thompson &qf_output_fields); 143e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1444a2e7687Sjeremylt 1454a2e7687Sjeremylt // Allocate 146d1d35e2fSjeremylt ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr); 147e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1484fc1f125SJeremy L Thompson ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs_full); 149e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1504a2e7687Sjeremylt 1514fc1f125SJeremy L Thompson ierr = CeedCalloc(CEED_FIELD_MAX, &impl->input_states); CeedChkBackend(ierr); 152bf4cb664SJeremy L Thompson ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_in); CeedChkBackend(ierr); 153bf4cb664SJeremy L Thompson ierr = CeedCalloc(CEED_FIELD_MAX, &impl->e_vecs_out); CeedChkBackend(ierr); 154bf4cb664SJeremy L Thompson ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_in); CeedChkBackend(ierr); 155bf4cb664SJeremy L Thompson ierr = CeedCalloc(CEED_FIELD_MAX, &impl->q_vecs_out); CeedChkBackend(ierr); 1564a2e7687Sjeremylt 1574fc1f125SJeremy L Thompson impl->num_inputs = num_input_fields; 1584fc1f125SJeremy L Thompson impl->num_outputs = num_output_fields; 159aedaa0e5Sjeremylt 1604a2e7687Sjeremylt // Set up infield and outfield pointer arrays 1614a2e7687Sjeremylt // Infields 1624fc1f125SJeremy L Thompson ierr = CeedOperatorSetupFields_Blocked(qf, op, true, impl->blk_restr, 1634fc1f125SJeremy L Thompson impl->e_vecs_full, impl->e_vecs_in, 164d1d35e2fSjeremylt impl->q_vecs_in, 0, 165d1d35e2fSjeremylt num_input_fields, Q); 166e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1674a2e7687Sjeremylt // Outfields 1684fc1f125SJeremy L Thompson ierr = CeedOperatorSetupFields_Blocked(qf, op, false, impl->blk_restr, 1694fc1f125SJeremy L Thompson impl->e_vecs_full, impl->e_vecs_out, 170d1d35e2fSjeremylt impl->q_vecs_out, num_input_fields, 171d1d35e2fSjeremylt num_output_fields, Q); 172e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 173aedaa0e5Sjeremylt 17416911fdaSjeremylt // Identity QFunctions 1750b454692Sjeremylt if (impl->is_identity_qf) { 176d1d35e2fSjeremylt CeedEvalMode in_mode, out_mode; 177d1d35e2fSjeremylt CeedQFunctionField *in_fields, *out_fields; 1787e7773b5SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &in_fields, NULL, &out_fields); 179e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1800b454692Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(in_fields[0], &in_mode); 181d1d35e2fSjeremylt CeedChkBackend(ierr); 1820b454692Sjeremylt ierr = CeedQFunctionFieldGetEvalMode(out_fields[0], &out_mode); 183d1d35e2fSjeremylt CeedChkBackend(ierr); 184d1d35e2fSjeremylt 1850b454692Sjeremylt if (in_mode == CEED_EVAL_NONE && out_mode == CEED_EVAL_NONE) { 1860b454692Sjeremylt impl->is_identity_restr_op = true; 1870b454692Sjeremylt } else { 1880b454692Sjeremylt ierr = CeedVectorDestroy(&impl->q_vecs_out[0]); CeedChkBackend(ierr); 1890b454692Sjeremylt impl->q_vecs_out[0] = impl->q_vecs_in[0]; 1900b454692Sjeremylt ierr = CeedVectorAddReference(impl->q_vecs_in[0]); CeedChkBackend(ierr); 19116911fdaSjeremylt } 19216911fdaSjeremylt } 19316911fdaSjeremylt 194e15f9bd0SJeremy L Thompson ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr); 1954a2e7687Sjeremylt 196e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1974a2e7687Sjeremylt } 1984a2e7687Sjeremylt 199f10650afSjeremylt //------------------------------------------------------------------------------ 200f10650afSjeremylt // Setup Operator Inputs 201f10650afSjeremylt //------------------------------------------------------------------------------ 202d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Blocked(CeedInt num_input_fields, 203d1d35e2fSjeremylt CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 2044fc1f125SJeremy L Thompson CeedVector in_vec, bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 205a0162de9SJeremy L Thompson CeedOperator_Blocked *impl, CeedRequest *request) { 2061d102b48SJeremy L Thompson CeedInt ierr; 207d1d35e2fSjeremylt CeedEvalMode eval_mode; 208d1bcdac9Sjeremylt CeedVector vec; 20916c359e6Sjeremylt uint64_t state; 2104a2e7687Sjeremylt 211d1d35e2fSjeremylt for (CeedInt i=0; i<num_input_fields; i++) { 2121d102b48SJeremy L Thompson // Get input vector 213d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 214d1d35e2fSjeremylt CeedChkBackend(ierr); 2151d102b48SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 216d1d35e2fSjeremylt if (skip_active) 2171d102b48SJeremy L Thompson continue; 2181d102b48SJeremy L Thompson else 219d1d35e2fSjeremylt vec = in_vec; 2201d102b48SJeremy L Thompson } 2211d102b48SJeremy L Thompson 222d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 223e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 224d1d35e2fSjeremylt if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 2254a2e7687Sjeremylt } else { 2264a2e7687Sjeremylt // Restrict 227e15f9bd0SJeremy L Thompson ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr); 2284fc1f125SJeremy L Thompson if (state != impl->input_states[i] || vec == in_vec) { 229d1d35e2fSjeremylt ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE, 2304fc1f125SJeremy L Thompson vec, impl->e_vecs_full[i], request); 231e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 2324fc1f125SJeremy L Thompson impl->input_states[i] = state; 23316c359e6Sjeremylt } 2344a2e7687Sjeremylt // Get evec 2354fc1f125SJeremy L Thompson ierr = CeedVectorGetArrayRead(impl->e_vecs_full[i], CEED_MEM_HOST, 2364fc1f125SJeremy L Thompson (const CeedScalar **) &e_data_full[i]); 237e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 2384a2e7687Sjeremylt } 2394a2e7687Sjeremylt } 240e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2414a2e7687Sjeremylt } 2424a2e7687Sjeremylt 243f10650afSjeremylt //------------------------------------------------------------------------------ 244f10650afSjeremylt // Input Basis Action 245f10650afSjeremylt //------------------------------------------------------------------------------ 2461d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q, 247d1d35e2fSjeremylt CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 248d1d35e2fSjeremylt CeedInt num_input_fields, CeedInt blk_size, bool skip_active, 2494fc1f125SJeremy L Thompson CeedScalar *e_data_full[2*CEED_FIELD_MAX], CeedOperator_Blocked *impl) { 2501d102b48SJeremy L Thompson CeedInt ierr; 251d1d35e2fSjeremylt CeedInt dim, elem_size, size; 252d1d35e2fSjeremylt CeedElemRestriction elem_restr; 253d1d35e2fSjeremylt CeedEvalMode eval_mode; 2541d102b48SJeremy L Thompson CeedBasis basis; 2551d102b48SJeremy L Thompson 256d1d35e2fSjeremylt for (CeedInt i=0; i<num_input_fields; i++) { 2571d102b48SJeremy L Thompson // Skip active input 258d1d35e2fSjeremylt if (skip_active) { 2591d102b48SJeremy L Thompson CeedVector vec; 260d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 261d1d35e2fSjeremylt CeedChkBackend(ierr); 2621d102b48SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) 2631d102b48SJeremy L Thompson continue; 2641d102b48SJeremy L Thompson } 2651d102b48SJeremy L Thompson 266d1d35e2fSjeremylt // Get elem_size, eval_mode, size 267d1d35e2fSjeremylt ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr); 268e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 269d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 270e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 271d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 272e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 273d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 274d1d35e2fSjeremylt CeedChkBackend(ierr); 2754a2e7687Sjeremylt // Basis action 276d1d35e2fSjeremylt switch(eval_mode) { 2774a2e7687Sjeremylt case CEED_EVAL_NONE: 278d1d35e2fSjeremylt ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST, 2794fc1f125SJeremy L Thompson CEED_USE_POINTER, &e_data_full[i][e*Q*size]); 280a0162de9SJeremy L Thompson CeedChkBackend(ierr); 2814a2e7687Sjeremylt break; 2824a2e7687Sjeremylt case CEED_EVAL_INTERP: 283d1d35e2fSjeremylt ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 284e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 285d1d35e2fSjeremylt ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 2864fc1f125SJeremy L Thompson CEED_USE_POINTER, &e_data_full[i][e*elem_size*size]); 287e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 288d1d35e2fSjeremylt ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 289d1d35e2fSjeremylt CEED_EVAL_INTERP, impl->e_vecs_in[i], 290d1d35e2fSjeremylt impl->q_vecs_in[i]); CeedChkBackend(ierr); 2914a2e7687Sjeremylt break; 2924a2e7687Sjeremylt case CEED_EVAL_GRAD: 293d1d35e2fSjeremylt ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 294e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 295e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 296d1d35e2fSjeremylt ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST, 2974fc1f125SJeremy L Thompson CEED_USE_POINTER, &e_data_full[i][e*elem_size*size/dim]); 298e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 299d1d35e2fSjeremylt ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE, 300d1d35e2fSjeremylt CEED_EVAL_GRAD, impl->e_vecs_in[i], 301d1d35e2fSjeremylt impl->q_vecs_in[i]); CeedChkBackend(ierr); 3024a2e7687Sjeremylt break; 3034a2e7687Sjeremylt case CEED_EVAL_WEIGHT: 3044a2e7687Sjeremylt break; // No action 305bbfacfcdSjeremylt // LCOV_EXCL_START 3064a2e7687Sjeremylt case CEED_EVAL_DIV: 3071d102b48SJeremy L Thompson case CEED_EVAL_CURL: { 308d1d35e2fSjeremylt ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis); 309e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 3101d102b48SJeremy L Thompson Ceed ceed; 311e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 312e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 313e15f9bd0SJeremy L Thompson "Ceed evaluation mode not implemented"); 314bbfacfcdSjeremylt // LCOV_EXCL_STOP 3154a2e7687Sjeremylt } 3164a2e7687Sjeremylt } 31789c6efa4Sjeremylt } 318e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31989c6efa4Sjeremylt } 3204a2e7687Sjeremylt 321f10650afSjeremylt //------------------------------------------------------------------------------ 322f10650afSjeremylt // Output Basis Action 323f10650afSjeremylt //------------------------------------------------------------------------------ 3241d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q, 325d1d35e2fSjeremylt CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, 326d1d35e2fSjeremylt CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields, 3274fc1f125SJeremy L Thompson CeedOperator op, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 328a0162de9SJeremy L Thompson CeedOperator_Blocked *impl) { 3291d102b48SJeremy L Thompson CeedInt ierr; 330d1d35e2fSjeremylt CeedInt dim, elem_size, size; 331d1d35e2fSjeremylt CeedElemRestriction elem_restr; 332d1d35e2fSjeremylt CeedEvalMode eval_mode; 3331d102b48SJeremy L Thompson CeedBasis basis; 3341d102b48SJeremy L Thompson 335d1d35e2fSjeremylt for (CeedInt i=0; i<num_output_fields; i++) { 336d1d35e2fSjeremylt // Get elem_size, eval_mode, size 337d1d35e2fSjeremylt ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr); 338e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 339d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size); 340e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 341d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 342e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 343d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 344e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 3454a2e7687Sjeremylt // Basis action 346d1d35e2fSjeremylt switch(eval_mode) { 3474a2e7687Sjeremylt case CEED_EVAL_NONE: 3484a2e7687Sjeremylt break; // No action 3494a2e7687Sjeremylt case CEED_EVAL_INTERP: 350d1d35e2fSjeremylt ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 351e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 352d1d35e2fSjeremylt ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 3534fc1f125SJeremy L Thompson CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size]); 354e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 355d1d35e2fSjeremylt ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 356d1d35e2fSjeremylt CEED_EVAL_INTERP, impl->q_vecs_out[i], 357d1d35e2fSjeremylt impl->e_vecs_out[i]); CeedChkBackend(ierr); 3584a2e7687Sjeremylt break; 3594a2e7687Sjeremylt case CEED_EVAL_GRAD: 360d1d35e2fSjeremylt ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis); 361e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 362e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 363d1d35e2fSjeremylt ierr = CeedVectorSetArray(impl->e_vecs_out[i], CEED_MEM_HOST, 3644fc1f125SJeremy L Thompson CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*elem_size*size/dim]); 365e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 366d1d35e2fSjeremylt ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE, 367d1d35e2fSjeremylt CEED_EVAL_GRAD, impl->q_vecs_out[i], 368d1d35e2fSjeremylt impl->e_vecs_out[i]); CeedChkBackend(ierr); 3694a2e7687Sjeremylt break; 370c042f62fSJeremy L Thompson // LCOV_EXCL_START 371bbfacfcdSjeremylt case CEED_EVAL_WEIGHT: { 3724ce2993fSjeremylt Ceed ceed; 373e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 374e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 375e15f9bd0SJeremy L Thompson "CEED_EVAL_WEIGHT cannot be an output " 3761d102b48SJeremy L Thompson "evaluation mode"); 3774ce2993fSjeremylt } 3784a2e7687Sjeremylt case CEED_EVAL_DIV: 3791d102b48SJeremy L Thompson case CEED_EVAL_CURL: { 3801d102b48SJeremy L Thompson Ceed ceed; 381e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 382e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 383e15f9bd0SJeremy L Thompson "Ceed evaluation mode not implemented"); 384bbfacfcdSjeremylt // LCOV_EXCL_STOP 3854a2e7687Sjeremylt } 38689c6efa4Sjeremylt } 38789c6efa4Sjeremylt } 388e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3891d102b48SJeremy L Thompson } 3901d102b48SJeremy L Thompson 391f10650afSjeremylt //------------------------------------------------------------------------------ 392f10650afSjeremylt // Restore Input Vectors 393f10650afSjeremylt //------------------------------------------------------------------------------ 394d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Blocked(CeedInt num_input_fields, 395d1d35e2fSjeremylt CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, 3964fc1f125SJeremy L Thompson bool skip_active, CeedScalar *e_data_full[2*CEED_FIELD_MAX], 397a0162de9SJeremy L Thompson CeedOperator_Blocked *impl) { 3981d102b48SJeremy L Thompson CeedInt ierr; 399d1d35e2fSjeremylt CeedEvalMode eval_mode; 4001d102b48SJeremy L Thompson 401d1d35e2fSjeremylt for (CeedInt i=0; i<num_input_fields; i++) { 4021d102b48SJeremy L Thompson // Skip active inputs 403d1d35e2fSjeremylt if (skip_active) { 4041d102b48SJeremy L Thompson CeedVector vec; 405d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 406d1d35e2fSjeremylt CeedChkBackend(ierr); 4071d102b48SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) 4081d102b48SJeremy L Thompson continue; 4091d102b48SJeremy L Thompson } 410d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode); 411e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 412d1d35e2fSjeremylt if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 4131d102b48SJeremy L Thompson } else { 4144fc1f125SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(impl->e_vecs_full[i], 4154fc1f125SJeremy L Thompson (const CeedScalar **) &e_data_full[i]); 416e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 4171d102b48SJeremy L Thompson } 4181d102b48SJeremy L Thompson } 419e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4201d102b48SJeremy L Thompson } 4211d102b48SJeremy L Thompson 422f10650afSjeremylt //------------------------------------------------------------------------------ 423f10650afSjeremylt // Operator Apply 424f10650afSjeremylt //------------------------------------------------------------------------------ 425d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector in_vec, 426d1d35e2fSjeremylt CeedVector out_vec, 4271d102b48SJeremy L Thompson CeedRequest *request) { 4281d102b48SJeremy L Thompson int ierr; 4291d102b48SJeremy L Thompson CeedOperator_Blocked *impl; 430e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 431d1d35e2fSjeremylt const CeedInt blk_size = 8; 432d1d35e2fSjeremylt CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 433d1d35e2fSjeremylt ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 434e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 435d1d35e2fSjeremylt CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 4361d102b48SJeremy L Thompson CeedQFunction qf; 437e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 438d1d35e2fSjeremylt CeedOperatorField *op_input_fields, *op_output_fields; 4397e7773b5SJeremy L Thompson ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 4407e7773b5SJeremy L Thompson &num_output_fields, &op_output_fields); 441e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 442d1d35e2fSjeremylt CeedQFunctionField *qf_input_fields, *qf_output_fields; 4437e7773b5SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 4447e7773b5SJeremy L Thompson &qf_output_fields); 445e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 446d1d35e2fSjeremylt CeedEvalMode eval_mode; 4471d102b48SJeremy L Thompson CeedVector vec; 4484fc1f125SJeremy L Thompson CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0}; 4491d102b48SJeremy L Thompson 4501d102b48SJeremy L Thompson // Setup 451e15f9bd0SJeremy L Thompson ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 4521d102b48SJeremy L Thompson 4530b454692Sjeremylt // Restriction only operator 4540b454692Sjeremylt if (impl->is_identity_restr_op) { 4550b454692Sjeremylt ierr = CeedElemRestrictionApply(impl->blk_restr[0], CEED_NOTRANSPOSE, in_vec, 4564fc1f125SJeremy L Thompson impl->e_vecs_full[0], request); CeedChkBackend(ierr); 4570b454692Sjeremylt ierr = CeedElemRestrictionApply(impl->blk_restr[1], CEED_TRANSPOSE, 4584fc1f125SJeremy L Thompson impl->e_vecs_full[0], out_vec, request); CeedChkBackend(ierr); 4590b454692Sjeremylt return CEED_ERROR_SUCCESS; 4600b454692Sjeremylt } 4610b454692Sjeremylt 4621d102b48SJeremy L Thompson // Input Evecs and Restriction 463d1d35e2fSjeremylt ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 4644fc1f125SJeremy L Thompson op_input_fields, in_vec, false, e_data_full, 465a0162de9SJeremy L Thompson impl, request); CeedChkBackend(ierr); 4661d102b48SJeremy L Thompson 4671d102b48SJeremy L Thompson // Output Evecs 468d1d35e2fSjeremylt for (CeedInt i=0; i<num_output_fields; i++) { 4699c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(impl->e_vecs_full[i+impl->num_inputs], 4709c774eddSJeremy L Thompson CEED_MEM_HOST, &e_data_full[i + num_input_fields]); 4719c774eddSJeremy L Thompson CeedChkBackend(ierr); 4721d102b48SJeremy L Thompson } 4731d102b48SJeremy L Thompson 4741d102b48SJeremy L Thompson // Loop through elements 475d1d35e2fSjeremylt for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 4761d102b48SJeremy L Thompson // Output pointers 477d1d35e2fSjeremylt for (CeedInt i=0; i<num_output_fields; i++) { 478d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode); 479e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 480d1d35e2fSjeremylt if (eval_mode == CEED_EVAL_NONE) { 481d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 482e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 483d1d35e2fSjeremylt ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST, 4844fc1f125SJeremy L Thompson CEED_USE_POINTER, &e_data_full[i + num_input_fields][e*Q*size]); 485e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 4861d102b48SJeremy L Thompson } 4871d102b48SJeremy L Thompson } 4881d102b48SJeremy L Thompson 48916911fdaSjeremylt // Input basis apply 490d1d35e2fSjeremylt ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 4914fc1f125SJeremy L Thompson num_input_fields, blk_size, false, e_data_full, 492a0162de9SJeremy L Thompson impl); CeedChkBackend(ierr); 49316911fdaSjeremylt 4941d102b48SJeremy L Thompson // Q function 4950b454692Sjeremylt if (!impl->is_identity_qf) { 496d1d35e2fSjeremylt ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 497e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 49816911fdaSjeremylt } 4991d102b48SJeremy L Thompson 5001d102b48SJeremy L Thompson // Output basis apply 501d1d35e2fSjeremylt ierr = CeedOperatorOutputBasis_Blocked(e, Q, qf_output_fields, op_output_fields, 502d1d35e2fSjeremylt blk_size, num_input_fields, 5034fc1f125SJeremy L Thompson num_output_fields, op, e_data_full, impl); 504e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 5051d102b48SJeremy L Thompson } 50689c6efa4Sjeremylt 50789c6efa4Sjeremylt // Output restriction 508d1d35e2fSjeremylt for (CeedInt i=0; i<num_output_fields; i++) { 50989c6efa4Sjeremylt // Restore evec 5104fc1f125SJeremy L Thompson ierr = CeedVectorRestoreArray(impl->e_vecs_full[i+impl->num_inputs], 5119c774eddSJeremy L Thompson &e_data_full[i + num_input_fields]); 5129c774eddSJeremy L Thompson CeedChkBackend(ierr); 513d1bcdac9Sjeremylt // Get output vector 514d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 515e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 51689c6efa4Sjeremylt // Active 517d1bcdac9Sjeremylt if (vec == CEED_VECTOR_ACTIVE) 518d1d35e2fSjeremylt vec = out_vec; 5194a2e7687Sjeremylt // Restrict 5204fc1f125SJeremy L Thompson ierr = CeedElemRestrictionApply(impl->blk_restr[i+impl->num_inputs], 5214fc1f125SJeremy L Thompson CEED_TRANSPOSE, impl->e_vecs_full[i+impl->num_inputs], 522e15f9bd0SJeremy L Thompson vec, request); CeedChkBackend(ierr); 52389c6efa4Sjeremylt 5244a2e7687Sjeremylt } 5254a2e7687Sjeremylt 5264a2e7687Sjeremylt // Restore input arrays 527d1d35e2fSjeremylt ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 5284fc1f125SJeremy L Thompson op_input_fields, false, e_data_full, impl); CeedChkBackend(ierr); 5291d102b48SJeremy L Thompson 530e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5311d102b48SJeremy L Thompson } 5321d102b48SJeremy L Thompson 533f10650afSjeremylt //------------------------------------------------------------------------------ 53470a7ffb3SJeremy L Thompson // Core code for assembling linear QFunction 535f10650afSjeremylt //------------------------------------------------------------------------------ 53670a7ffb3SJeremy L Thompson static inline int CeedOperatorLinearAssembleQFunctionCore_Blocked( 537a0162de9SJeremy L Thompson CeedOperator op, bool build_objects, CeedVector *assembled, 538a0162de9SJeremy L Thompson CeedElemRestriction *rstr, CeedRequest *request) { 5391d102b48SJeremy L Thompson int ierr; 5401d102b48SJeremy L Thompson CeedOperator_Blocked *impl; 541e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 542d1d35e2fSjeremylt const CeedInt blk_size = 8; 543d1d35e2fSjeremylt CeedInt Q, num_input_fields, num_output_fields, num_elem, size; 544d1d35e2fSjeremylt ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr); 545e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr); 546d1d35e2fSjeremylt CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size); 5471d102b48SJeremy L Thompson CeedQFunction qf; 548e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 549d1d35e2fSjeremylt CeedOperatorField *op_input_fields, *op_output_fields; 5507e7773b5SJeremy L Thompson ierr = CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, 5517e7773b5SJeremy L Thompson &num_output_fields, &op_output_fields); 552e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 553d1d35e2fSjeremylt CeedQFunctionField *qf_input_fields, *qf_output_fields; 5547e7773b5SJeremy L Thompson ierr = CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, 5557e7773b5SJeremy L Thompson &qf_output_fields); 556e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 5574fc1f125SJeremy L Thompson CeedVector vec, l_vec = impl->qf_l_vec; 5584fc1f125SJeremy L Thompson CeedInt num_active_in = impl->num_active_in, 5594fc1f125SJeremy L Thompson num_active_out = impl->num_active_out; 560bb219a0fSJeremy L Thompson CeedVector *active_in = impl->qf_active_in; 5611d102b48SJeremy L Thompson CeedScalar *a, *tmp; 5621d102b48SJeremy L Thompson Ceed ceed; 563e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 5644fc1f125SJeremy L Thompson CeedScalar *e_data_full[2*CEED_FIELD_MAX] = {0}; 5651d102b48SJeremy L Thompson 5661d102b48SJeremy L Thompson // Setup 567e15f9bd0SJeremy L Thompson ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr); 5681d102b48SJeremy L Thompson 56916911fdaSjeremylt // Check for identity 5700b454692Sjeremylt if (impl->is_identity_qf) 57116911fdaSjeremylt // LCOV_EXCL_START 572e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 573d1d35e2fSjeremylt "Assembling identity QFunctions not supported"); 57416911fdaSjeremylt // LCOV_EXCL_STOP 57516911fdaSjeremylt 5761d102b48SJeremy L Thompson // Input Evecs and Restriction 577d1d35e2fSjeremylt ierr = CeedOperatorSetupInputs_Blocked(num_input_fields, qf_input_fields, 5784fc1f125SJeremy L Thompson op_input_fields, NULL, true, e_data_full, 579a0162de9SJeremy L Thompson impl, request); CeedChkBackend(ierr); 5801d102b48SJeremy L Thompson 5811d102b48SJeremy L Thompson // Count number of active input fields 582bb219a0fSJeremy L Thompson if (!num_active_in) { 583d1d35e2fSjeremylt for (CeedInt i=0; i<num_input_fields; i++) { 5841d102b48SJeremy L Thompson // Get input vector 585d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec); 586d1d35e2fSjeremylt CeedChkBackend(ierr); 5871d102b48SJeremy L Thompson // Check if active input 5881d102b48SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 589d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size); 590e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 591d1d35e2fSjeremylt ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr); 592d1d35e2fSjeremylt ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp); 593d1d35e2fSjeremylt CeedChkBackend(ierr); 594d1d35e2fSjeremylt ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr); 5951d102b48SJeremy L Thompson for (CeedInt field=0; field<size; field++) { 596d1d35e2fSjeremylt ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]); 597e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 598d1d35e2fSjeremylt ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST, 599d1d35e2fSjeremylt CEED_USE_POINTER, &tmp[field*Q*blk_size]); 600e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 6011d102b48SJeremy L Thompson } 602d1d35e2fSjeremylt num_active_in += size; 603d1d35e2fSjeremylt ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr); 6041d102b48SJeremy L Thompson } 6051d102b48SJeremy L Thompson } 6064fc1f125SJeremy L Thompson impl->num_active_in = num_active_in; 607bb219a0fSJeremy L Thompson impl->qf_active_in = active_in; 608bb219a0fSJeremy L Thompson } 6091d102b48SJeremy L Thompson 6101d102b48SJeremy L Thompson // Count number of active output fields 611bb219a0fSJeremy L Thompson if (!num_active_out) { 612d1d35e2fSjeremylt for (CeedInt i=0; i<num_output_fields; i++) { 6131d102b48SJeremy L Thompson // Get output vector 614d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec); 615e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 6161d102b48SJeremy L Thompson // Check if active output 6171d102b48SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 618d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size); 619e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 620d1d35e2fSjeremylt num_active_out += size; 6211d102b48SJeremy L Thompson } 6221d102b48SJeremy L Thompson } 6234fc1f125SJeremy L Thompson impl->num_active_out = num_active_out; 624bb219a0fSJeremy L Thompson } 6251d102b48SJeremy L Thompson 6261d102b48SJeremy L Thompson // Check sizes 627d1d35e2fSjeremylt if (!num_active_in || !num_active_out) 6281d102b48SJeremy L Thompson // LCOV_EXCL_START 629e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 630e15f9bd0SJeremy L Thompson "Cannot assemble QFunction without active inputs " 6311d102b48SJeremy L Thompson "and outputs"); 6321d102b48SJeremy L Thompson // LCOV_EXCL_STOP 6331d102b48SJeremy L Thompson 634d1d35e2fSjeremylt // Setup Lvec 6354fc1f125SJeremy L Thompson if (!l_vec) { 636d1d35e2fSjeremylt ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out, 6374fc1f125SJeremy L Thompson &l_vec); CeedChkBackend(ierr); 6384fc1f125SJeremy L Thompson impl->qf_l_vec = l_vec; 639bb219a0fSJeremy L Thompson } 6409c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(l_vec, CEED_MEM_HOST, &a); CeedChkBackend(ierr); 6411d102b48SJeremy L Thompson 64270a7ffb3SJeremy L Thompson // Build objects if needed 643d1d35e2fSjeremylt CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q}; 64470a7ffb3SJeremy L Thompson if (build_objects) { 64570a7ffb3SJeremy L Thompson // Create output restriction 646d1d35e2fSjeremylt ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q, 647d1d35e2fSjeremylt num_active_in*num_active_out, 648d1d35e2fSjeremylt num_active_in*num_active_out*num_elem*Q, 649e15f9bd0SJeremy L Thompson strides, rstr); CeedChkBackend(ierr); 6501d102b48SJeremy L Thompson // Create assembled vector 651d1d35e2fSjeremylt ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out, 652e15f9bd0SJeremy L Thompson assembled); CeedChkBackend(ierr); 65370a7ffb3SJeremy L Thompson } 6541d102b48SJeremy L Thompson 6551d102b48SJeremy L Thompson // Loop through elements 656d1d35e2fSjeremylt for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) { 6571d102b48SJeremy L Thompson // Input basis apply 658d1d35e2fSjeremylt ierr = CeedOperatorInputBasis_Blocked(e, Q, qf_input_fields, op_input_fields, 6594fc1f125SJeremy L Thompson num_input_fields, blk_size, true, e_data_full, 660a0162de9SJeremy L Thompson impl); CeedChkBackend(ierr); 6611d102b48SJeremy L Thompson 6621d102b48SJeremy L Thompson // Assemble QFunction 663d1d35e2fSjeremylt for (CeedInt in=0; in<num_active_in; in++) { 6641d102b48SJeremy L Thompson // Set Inputs 665d1d35e2fSjeremylt ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr); 666d1d35e2fSjeremylt if (num_active_in > 1) { 667d1d35e2fSjeremylt ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in], 668e15f9bd0SJeremy L Thompson 0.0); CeedChkBackend(ierr); 66942ea3801Sjeremylt } 6701d102b48SJeremy L Thompson // Set Outputs 671d1d35e2fSjeremylt for (CeedInt out=0; out<num_output_fields; out++) { 6721d102b48SJeremy L Thompson // Get output vector 673d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 674e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 6751d102b48SJeremy L Thompson // Check if active output 6761d102b48SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 677d1d35e2fSjeremylt CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, 678e15f9bd0SJeremy L Thompson CEED_USE_POINTER, a); CeedChkBackend(ierr); 679d1d35e2fSjeremylt ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size); 680e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 681d1d35e2fSjeremylt a += size*Q*blk_size; // Advance the pointer by the size of the output 6821d102b48SJeremy L Thompson } 6831d102b48SJeremy L Thompson } 6841d102b48SJeremy L Thompson // Apply QFunction 685d1d35e2fSjeremylt ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out); 686e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 6874a2e7687Sjeremylt } 6884a2e7687Sjeremylt } 6894a2e7687Sjeremylt 6901d102b48SJeremy L Thompson // Un-set output Qvecs to prevent accidental overwrite of Assembled 691d1d35e2fSjeremylt for (CeedInt out=0; out<num_output_fields; out++) { 6921d102b48SJeremy L Thompson // Get output vector 693d1d35e2fSjeremylt ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec); 694e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 6951d102b48SJeremy L Thompson // Check if active output 6961d102b48SJeremy L Thompson if (vec == CEED_VECTOR_ACTIVE) { 697d1d35e2fSjeremylt CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES, 698e15f9bd0SJeremy L Thompson NULL); CeedChkBackend(ierr); 6991d102b48SJeremy L Thompson } 7001d102b48SJeremy L Thompson } 7011d102b48SJeremy L Thompson 7021d102b48SJeremy L Thompson // Restore input arrays 703d1d35e2fSjeremylt ierr = CeedOperatorRestoreInputs_Blocked(num_input_fields, qf_input_fields, 7044fc1f125SJeremy L Thompson op_input_fields, true, e_data_full, impl); CeedChkBackend(ierr); 7051d102b48SJeremy L Thompson 7061d102b48SJeremy L Thompson // Output blocked restriction 7074fc1f125SJeremy L Thompson ierr = CeedVectorRestoreArray(l_vec, &a); CeedChkBackend(ierr); 708e15f9bd0SJeremy L Thompson ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr); 709bb219a0fSJeremy L Thompson CeedElemRestriction blk_rstr = impl->qf_blk_rstr; 710bb219a0fSJeremy L Thompson if (!impl->qf_blk_rstr) { 711d1d35e2fSjeremylt ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size, 712d1d35e2fSjeremylt num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q, 713d1d35e2fSjeremylt strides, &blk_rstr); CeedChkBackend(ierr); 714bb219a0fSJeremy L Thompson impl->qf_blk_rstr = blk_rstr; 715bb219a0fSJeremy L Thompson } 7164fc1f125SJeremy L Thompson ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, l_vec, *assembled, 717e15f9bd0SJeremy L Thompson request); CeedChkBackend(ierr); 7181d102b48SJeremy L Thompson 719e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 7204a2e7687Sjeremylt } 7214a2e7687Sjeremylt 722f10650afSjeremylt //------------------------------------------------------------------------------ 72370a7ffb3SJeremy L Thompson // Assemble Linear QFunction 72470a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------ 72570a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op, 72670a7ffb3SJeremy L Thompson CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 72770a7ffb3SJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, true, assembled, 72870a7ffb3SJeremy L Thompson rstr, request); 72970a7ffb3SJeremy L Thompson } 73070a7ffb3SJeremy L Thompson 73170a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------ 73270a7ffb3SJeremy L Thompson // Update Assembled Linear QFunction 73370a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------ 73470a7ffb3SJeremy L Thompson static int CeedOperatorLinearAssembleQFunctionUpdate_Blocked(CeedOperator op, 73570a7ffb3SJeremy L Thompson CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 73670a7ffb3SJeremy L Thompson return CeedOperatorLinearAssembleQFunctionCore_Blocked(op, false, &assembled, 73770a7ffb3SJeremy L Thompson &rstr, request); 73870a7ffb3SJeremy L Thompson } 73970a7ffb3SJeremy L Thompson 74070a7ffb3SJeremy L Thompson //------------------------------------------------------------------------------ 741f10650afSjeremylt // Operator Destroy 742f10650afSjeremylt //------------------------------------------------------------------------------ 743f10650afSjeremylt static int CeedOperatorDestroy_Blocked(CeedOperator op) { 744f10650afSjeremylt int ierr; 745f10650afSjeremylt CeedOperator_Blocked *impl; 746e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 747f10650afSjeremylt 7484fc1f125SJeremy L Thompson for (CeedInt i=0; i<impl->num_inputs+impl->num_outputs; i++) { 749d1d35e2fSjeremylt ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr); 7504fc1f125SJeremy L Thompson ierr = CeedVectorDestroy(&impl->e_vecs_full[i]); CeedChkBackend(ierr); 751f10650afSjeremylt } 752d1d35e2fSjeremylt ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr); 7534fc1f125SJeremy L Thompson ierr = CeedFree(&impl->e_vecs_full); CeedChkBackend(ierr); 7544fc1f125SJeremy L Thompson ierr = CeedFree(&impl->input_states); CeedChkBackend(ierr); 755f10650afSjeremylt 7564fc1f125SJeremy L Thompson for (CeedInt i=0; i<impl->num_inputs; i++) { 757d1d35e2fSjeremylt ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr); 758d1d35e2fSjeremylt ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr); 759f10650afSjeremylt } 760d1d35e2fSjeremylt ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr); 761d1d35e2fSjeremylt ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr); 762f10650afSjeremylt 7634fc1f125SJeremy L Thompson for (CeedInt i=0; i<impl->num_outputs; i++) { 764d1d35e2fSjeremylt ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr); 765d1d35e2fSjeremylt ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr); 766f10650afSjeremylt } 767d1d35e2fSjeremylt ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr); 768d1d35e2fSjeremylt ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr); 769f10650afSjeremylt 770bb219a0fSJeremy L Thompson // QFunction assembly data 7714fc1f125SJeremy L Thompson for (CeedInt i=0; i<impl->num_active_in; i++) { 772bb219a0fSJeremy L Thompson ierr = CeedVectorDestroy(&impl->qf_active_in[i]); CeedChkBackend(ierr); 773bb219a0fSJeremy L Thompson } 774bb219a0fSJeremy L Thompson ierr = CeedFree(&impl->qf_active_in); CeedChkBackend(ierr); 7754fc1f125SJeremy L Thompson ierr = CeedVectorDestroy(&impl->qf_l_vec); CeedChkBackend(ierr); 776bb219a0fSJeremy L Thompson ierr = CeedElemRestrictionDestroy(&impl->qf_blk_rstr); CeedChkBackend(ierr); 777bb219a0fSJeremy L Thompson 778e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 779e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 780f10650afSjeremylt } 781f10650afSjeremylt 782f10650afSjeremylt //------------------------------------------------------------------------------ 783f10650afSjeremylt // Operator Create 784f10650afSjeremylt //------------------------------------------------------------------------------ 7854a2e7687Sjeremylt int CeedOperatorCreate_Blocked(CeedOperator op) { 7864a2e7687Sjeremylt int ierr; 787fe2413ffSjeremylt Ceed ceed; 788e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 7894ce2993fSjeremylt CeedOperator_Blocked *impl; 7904a2e7687Sjeremylt 791e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 792e15f9bd0SJeremy L Thompson ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 793fe2413ffSjeremylt 79480ac2e43SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", 79580ac2e43SJeremy L Thompson CeedOperatorLinearAssembleQFunction_Blocked); 796e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 79770a7ffb3SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Operator", op, 79870a7ffb3SJeremy L Thompson "LinearAssembleQFunctionUpdate", 79970a7ffb3SJeremy L Thompson CeedOperatorLinearAssembleQFunctionUpdate_Blocked); 80070a7ffb3SJeremy L Thompson CeedChkBackend(ierr); 801cae8b89aSjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 802e15f9bd0SJeremy L Thompson CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr); 803fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 804e15f9bd0SJeremy L Thompson CeedOperatorDestroy_Blocked); CeedChkBackend(ierr); 805e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8064a2e7687Sjeremylt } 807f10650afSjeremylt //------------------------------------------------------------------------------ 808