xref: /honee/problems/sgs_dd_model.c (revision 8340219bafa41388624676c9e984cd3223bcfb2c)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 
4 #include "../qfunctions/sgs_dd_model.h"
5 
6 #include <petscdmplex.h>
7 
8 #include <navierstokes.h>
9 #include <sgs_model_torch.h>
10 
11 typedef struct {
12   CeedElemRestriction      elem_restr_grid_aniso, elem_restr_sgs;
13   CeedVector               grid_aniso_ceed;
14   CeedQFunctionContext     sgsdd_qfctx, ifunction_qfctx;
15   SGSModelDDImplementation sgs_dd_model_implementation;
16 } *SgsDDSetupData;
17 
18 #define GRAD_VELO_PROJ_KEY "Gradient of Velocity Projection"
19 
20 PetscErrorCode SgsDDSetupDataDestroy(SgsDDSetupData sgs_dd_setup_data) {
21   Ceed ceed;
22 
23   PetscFunctionBeginUser;
24   PetscCall(CeedElemRestrictionGetCeed(sgs_dd_setup_data->elem_restr_sgs, &ceed));
25 
26   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&sgs_dd_setup_data->elem_restr_grid_aniso));
27   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&sgs_dd_setup_data->elem_restr_sgs));
28   PetscCallCeed(ceed, CeedVectorDestroy(&sgs_dd_setup_data->grid_aniso_ceed));
29   PetscCallCeed(ceed, CeedQFunctionContextDestroy(&sgs_dd_setup_data->sgsdd_qfctx));
30   PetscCallCeed(ceed, CeedQFunctionContextDestroy(&sgs_dd_setup_data->ifunction_qfctx));
31   PetscCall(PetscFree(sgs_dd_setup_data));
32   PetscCheck(CeedDestroy(&ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Destroying Ceed object failed");
33   PetscFunctionReturn(PETSC_SUCCESS);
34 }
35 
36 // @brief Create DM for storing subgrid stress at nodes
37 static PetscErrorCode SgsDDCreateDM(DM dm_source, DM *dm_sgs, PetscInt degree, PetscInt q_extra, PetscInt *num_components) {
38   PetscSection section;
39 
40   PetscFunctionBeginUser;
41   *num_components = 6;
42 
43   PetscCall(DMClone(dm_source, dm_sgs));
44   PetscCall(DMSetMatrixPreallocateSkip(*dm_sgs, PETSC_TRUE));
45   PetscCall(PetscObjectSetName((PetscObject)*dm_sgs, "Subgrid Stress Projection"));
46 
47   PetscCall(DMSetupByOrder_FEM(PETSC_TRUE, PETSC_TRUE, degree, 1, q_extra, 1, num_components, *dm_sgs));
48 
49   PetscCall(DMGetLocalSection(*dm_sgs, &section));
50   PetscCall(PetscSectionSetFieldName(section, 0, ""));
51   PetscCall(PetscSectionSetComponentName(section, 0, 0, "KMSubgridStressXX"));
52   PetscCall(PetscSectionSetComponentName(section, 0, 1, "KMSubgridStressYY"));
53   PetscCall(PetscSectionSetComponentName(section, 0, 2, "KMSubgridStressZZ"));
54   PetscCall(PetscSectionSetComponentName(section, 0, 3, "KMSubgridStressYZ"));
55   PetscCall(PetscSectionSetComponentName(section, 0, 4, "KMSubgridStressXZ"));
56   PetscCall(PetscSectionSetComponentName(section, 0, 5, "KMSubgridStressXY"));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 };
59 
60 // @brief Evaluate data-driven SGS using fused method
61 static PetscErrorCode SgsDDNodalStressEval_Fused(Honee honee, Vec Q_loc, Vec VelocityGradient, Vec SGSNodal_loc) {
62   SgsDDData    sgs_dd_data = honee->sgs_dd_data;
63   PetscMemType q_mem_type;
64 
65   PetscFunctionBeginUser;
66   PetscCall(VecPetscToCeed(Q_loc, &q_mem_type, honee->q_ceed));  // q_ceed is an implicit input
67 
68   PetscCall(ApplyCeedOperatorGlobalToLocal(VelocityGradient, SGSNodal_loc, sgs_dd_data->op_nodal_evaluation_ctx));
69 
70   PetscCall(VecCeedToPetsc(honee->q_ceed, q_mem_type, Q_loc));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 // @brief Create CeedOperator to calculate data-drive SGS at nodes using fused operator
75 static PetscErrorCode SgsDDSetupNodalEvaluation_Fused(Ceed ceed, Honee honee, SgsDDSetupData sgs_dd_setup_data) {
76   SgsDDData           sgs_dd_data = honee->sgs_dd_data;
77   CeedQFunction       qf_sgs_dd_nodal;
78   CeedOperator        op_sgs_dd_nodal;
79   CeedInt             num_comp_q, num_comp_grad_velo, num_comp_x, num_comp_grid_aniso;
80   PetscInt            dim;
81   CeedVector          inv_multiplicity;
82   CeedElemRestriction elem_restr_inv_multiplicity, elem_restr_grad_velo, elem_restr_sgs;
83   DMLabel             domain_label = NULL;
84   PetscInt            label_value = 0, height = 0, dm_field = 0;
85   NodalProjectionData grad_velo_proj;
86 
87   PetscFunctionBeginUser;
88   PetscCall(DMGetDimension(honee->dm, &dim));
89   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(honee->elem_restr_x, &num_comp_x));
90   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(honee->elem_restr_q, &num_comp_q));
91   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(sgs_dd_setup_data->elem_restr_grid_aniso, &num_comp_grid_aniso));
92   PetscCall(PetscObjectContainerQuery((PetscObject)honee, GRAD_VELO_PROJ_KEY, &grad_velo_proj));
93 
94   {  // Get velocity gradient information
95     CeedOperatorField op_field;
96     PetscCallCeed(ceed, CeedOperatorGetFieldByName(grad_velo_proj->l2_rhs_ctx->op, "velocity gradient", &op_field));
97     PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(op_field, &elem_restr_grad_velo));
98     PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(elem_restr_grad_velo, &num_comp_grad_velo));
99   }
100   PetscCall(DMPlexCeedElemRestrictionCreate(ceed, sgs_dd_data->dm_sgs, domain_label, label_value, height, dm_field, &elem_restr_sgs));
101   PetscCallCeed(ceed, CeedElemRestrictionCreateVector(elem_restr_sgs, &sgs_dd_data->sgs_nodal_ceed, NULL));
102 
103   PetscCall(GetInverseMultiplicity(ceed, sgs_dd_data->dm_sgs, domain_label, label_value, height, dm_field, PETSC_FALSE, &elem_restr_inv_multiplicity,
104                                    &inv_multiplicity));
105 
106   // -- Create operator for SGS DD model nodal evaluation
107   switch (honee->phys->state_var) {
108     case STATEVAR_PRIMITIVE:
109       PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Prim, ComputeSgsDDNodal_Prim_loc, &qf_sgs_dd_nodal));
110       break;
111     case STATEVAR_CONSERVATIVE:
112       PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Conserv, ComputeSgsDDNodal_Conserv_loc, &qf_sgs_dd_nodal));
113       break;
114     case STATEVAR_ENTROPY:
115       PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Entropy, ComputeSgsDDNodal_Entropy_loc, &qf_sgs_dd_nodal));
116       break;
117   }
118 
119   // Mesh/geometry order and solution basis order may differ, therefore must interpolate
120   CeedBasis basis_x_to_q;
121   PetscCallCeed(ceed, CeedBasisCreateProjection(honee->basis_x, honee->basis_q, &basis_x_to_q));
122 
123   PetscCallCeed(ceed, CeedQFunctionSetContext(qf_sgs_dd_nodal, sgs_dd_setup_data->sgsdd_qfctx));
124   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_nodal, "q", num_comp_q, CEED_EVAL_NONE));
125   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_nodal, "x", num_comp_x, CEED_EVAL_INTERP));
126   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_nodal, "gradient velocity", num_comp_grad_velo, CEED_EVAL_NONE));
127   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_nodal, "anisotropy tensor", num_comp_grid_aniso, CEED_EVAL_NONE));
128   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_nodal, "inverse multiplicity", 1, CEED_EVAL_NONE));
129   PetscCallCeed(ceed, CeedQFunctionAddOutput(qf_sgs_dd_nodal, "km_sgs", sgs_dd_data->num_comp_sgs, CEED_EVAL_NONE));
130 
131   PetscCallCeed(ceed, CeedOperatorCreate(ceed, qf_sgs_dd_nodal, NULL, NULL, &op_sgs_dd_nodal));
132   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_nodal, "q", honee->elem_restr_q, CEED_BASIS_NONE, honee->q_ceed));
133   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_nodal, "x", honee->elem_restr_x, basis_x_to_q, honee->x_coord));
134   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_nodal, "gradient velocity", elem_restr_grad_velo, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
135   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_nodal, "anisotropy tensor", sgs_dd_setup_data->elem_restr_grid_aniso, CEED_BASIS_NONE,
136                                            sgs_dd_setup_data->grid_aniso_ceed));
137   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_nodal, "inverse multiplicity", elem_restr_inv_multiplicity, CEED_BASIS_NONE, inv_multiplicity));
138   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_nodal, "km_sgs", elem_restr_sgs, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
139 
140   PetscCall(OperatorApplyContextCreate(grad_velo_proj->dm, sgs_dd_data->dm_sgs, ceed, op_sgs_dd_nodal, NULL, sgs_dd_data->sgs_nodal_ceed, NULL, NULL,
141                                        &sgs_dd_data->op_nodal_evaluation_ctx));
142 
143   sgs_dd_setup_data->elem_restr_sgs = elem_restr_sgs;
144   sgs_dd_data->sgs_nodal_eval       = SgsDDNodalStressEval_Fused;
145 
146   PetscCallCeed(ceed, CeedVectorDestroy(&inv_multiplicity));
147   PetscCallCeed(ceed, CeedBasisDestroy(&basis_x_to_q));
148   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_inv_multiplicity));
149   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_grad_velo));
150   PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_sgs_dd_nodal));
151   PetscCallCeed(ceed, CeedOperatorDestroy(&op_sgs_dd_nodal));
152   PetscFunctionReturn(PETSC_SUCCESS);
153 }
154 
155 // @brief Setup data-driven model inference using libCEED native implementation
156 static PetscErrorCode SgsDDSetupNodalEvaluation_Sequential_Ceed(Ceed ceed, SgsDDData sgs_dd_data, SgsDDSetupData sgs_dd_setup_data,
157                                                                 CeedElemRestriction elem_restr_dd_inputs, CeedElemRestriction elem_restr_dd_outputs,
158                                                                 CeedElemRestriction elem_restr_inv_multiplicity, CeedVector inv_multiplicity,
159                                                                 void **ctx) {
160   CeedQFunction         qf_sgs_dd_inference;
161   CeedOperator          op_sgs_dd_inference;
162   OperatorApplyContext *op_context = (OperatorApplyContext *)ctx;
163 
164   PetscFunctionBeginUser;
165   PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Sequential_Inference, ComputeSgsDDNodal_Sequential_Inference_loc,
166                                                   &qf_sgs_dd_inference));
167 
168   PetscCallCeed(ceed, CeedQFunctionSetContext(qf_sgs_dd_inference, sgs_dd_setup_data->sgsdd_qfctx));
169   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_inference, "model inputs", sgs_dd_data->num_comp_inputs, CEED_EVAL_NONE));
170   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_inference, "inverse multiplicity", 1, CEED_EVAL_NONE));
171   PetscCallCeed(ceed, CeedQFunctionAddOutput(qf_sgs_dd_inference, "model outputs", sgs_dd_data->num_comp_outputs, CEED_EVAL_NONE));
172 
173   PetscCallCeed(ceed, CeedOperatorCreate(ceed, qf_sgs_dd_inference, NULL, NULL, &op_sgs_dd_inference));
174   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_inference, "model inputs", elem_restr_dd_inputs, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
175   PetscCallCeed(ceed,
176                 CeedOperatorSetField(op_sgs_dd_inference, "inverse multiplicity", elem_restr_inv_multiplicity, CEED_BASIS_NONE, inv_multiplicity));
177   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_inference, "model outputs", elem_restr_dd_outputs, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
178 
179   PetscCall(OperatorApplyContextCreate(sgs_dd_data->dm_dd_inputs, sgs_dd_data->dm_dd_outputs, ceed, op_sgs_dd_inference, NULL, NULL, NULL, NULL,
180                                        op_context));
181   sgs_dd_data->sgs_nodal_inference_ctx_destroy = (PetscErrorCode(*)(void *))OperatorApplyContextDestroy;
182 
183   PetscCallCeed(ceed, CeedOperatorDestroy(&op_sgs_dd_inference));
184   PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_sgs_dd_inference));
185   PetscFunctionReturn(PETSC_SUCCESS);
186 }
187 
188 // @brief Perform data-driven model inference using libCEED native implementation
189 PetscErrorCode SgsDDNodalStressEval_Sequential_Ceed(Vec DD_Inputs_loc, Vec DD_Outputs_loc, void *ctx) {
190   OperatorApplyContext op_context = *(OperatorApplyContext *)ctx;
191 
192   PetscFunctionBeginUser;
193   PetscCall(PetscLogEventBegin(HONEE_SgsModelDDData, DD_Inputs_loc, DD_Outputs_loc, NULL, NULL));
194   PetscCall(PetscLogEventBegin(HONEE_SgsModelDDInference, DD_Inputs_loc, DD_Outputs_loc, NULL, NULL));
195   PetscCall(PetscLogGpuTimeBegin());
196   PetscCall(ApplyCeedOperatorLocalToLocal(DD_Inputs_loc, DD_Outputs_loc, op_context));
197   PetscCall(PetscLogGpuTimeEnd());
198   PetscCall(PetscLogEventEnd(HONEE_SgsModelDDInference, DD_Inputs_loc, DD_Outputs_loc, NULL, NULL));
199   PetscCall(PetscLogEventEnd(HONEE_SgsModelDDData, DD_Inputs_loc, DD_Outputs_loc, NULL, NULL));
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 // @brief Setup data-driven model inference using libtorch
204 static PetscErrorCode SgsDDSetupNodalEvaluation_Sequential_Torch(Ceed ceed, SgsDDData sgs_dd_data, SgsDDSetupData sgs_dd_setup_data,
205                                                                  CeedElemRestriction elem_restr_dd_inputs, CeedElemRestriction elem_restr_dd_outputs,
206                                                                  CeedElemRestriction elem_restr_inv_multiplicity, CeedVector inv_multiplicity,
207                                                                  void **ctx) {
208   const char     *ceed_resource;
209   char            model_path[PETSC_MAX_PATH_LEN] = "";
210   TorchDeviceType model_device_type;
211 
212   PetscFunctionBeginUser;
213   PetscCallCeed(ceed, CeedGetResource(ceed, &ceed_resource));
214   if (strstr(ceed_resource, "/gpu/cuda")) model_device_type = TORCH_DEVICE_CUDA;
215   else if (strstr(ceed_resource, "/gpu/hip")) model_device_type = TORCH_DEVICE_HIP;
216   // On-device XPU is not working reliably currently, default to CPU inference evaluation
217   // else if (strstr(ceed_resource, "/gpu/sycl")) model_device_type = TORCH_DEVICE_XPU;
218   else model_device_type = TORCH_DEVICE_CPU;
219   PetscCall(PetscOptionsGetEnum(NULL, NULL, "-sgs_model_dd_torch_model_device", TorchDeviceTypes, (PetscEnum *)&model_device_type, NULL));
220   PetscCall(PetscOptionsGetString(NULL, NULL, "-sgs_model_dd_torch_model_path", model_path, sizeof(model_path), NULL));
221 
222   PetscCall(LoadModel_Torch(model_path, model_device_type));
223 
224   PetscFunctionReturn(PETSC_SUCCESS);
225 }
226 
227 // @brief Perform data-driven model inference using libtorch
228 static PetscErrorCode SgsDDNodalStressEval_Sequential_Torch(Vec DD_Inputs_loc, Vec DD_Outputs_loc, void *ctx) {
229   static PetscBool run_through = PETSC_FALSE;
230   PetscFunctionBeginUser;
231   if (!run_through) {
232     PetscCall(VecViewFromOptions(DD_Inputs_loc, NULL, "-dd_inputs_loc_view"));
233   }
234   PetscCall(ModelInference_Torch(DD_Inputs_loc, DD_Outputs_loc));
235   if (!run_through) {
236     PetscCall(VecViewFromOptions(DD_Outputs_loc, NULL, "-dd_outputs_loc_view"));
237     run_through = PETSC_TRUE;
238   }
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 // @brief Evaluate data-driven SGS using sequential method
243 PetscErrorCode SgsDDNodalStressEval_Sequential(Honee honee, Vec Q_loc, Vec VelocityGradient, Vec SGSNodal_loc) {
244   SgsDDData    sgs_dd_data = honee->sgs_dd_data;
245   PetscMemType q_mem_type;
246   Vec          DD_Inputs_loc, DD_Outputs_loc;
247 
248   PetscFunctionBeginUser;
249   PetscCall(DMGetLocalVector(sgs_dd_data->dm_dd_inputs, &DD_Inputs_loc));
250   PetscCall(DMGetLocalVector(sgs_dd_data->dm_dd_outputs, &DD_Outputs_loc));
251   PetscCall(VecPetscToCeed(Q_loc, &q_mem_type, honee->q_ceed));  // q_ceed is an implicit input
252 
253   PetscCall(ApplyCeedOperatorGlobalToLocal(VelocityGradient, DD_Inputs_loc, sgs_dd_data->op_nodal_dd_inputs_ctx));
254   PetscCall(sgs_dd_data->sgs_nodal_inference(DD_Inputs_loc, DD_Outputs_loc, &sgs_dd_data->sgs_nodal_inference_ctx));
255   PetscCall(ApplyCeedOperatorLocalToLocal(DD_Outputs_loc, SGSNodal_loc, sgs_dd_data->op_nodal_dd_outputs_ctx));
256 
257   PetscCall(VecCeedToPetsc(honee->q_ceed, q_mem_type, Q_loc));
258   PetscCall(DMRestoreLocalVector(sgs_dd_data->dm_dd_inputs, &DD_Inputs_loc));
259   PetscCall(DMRestoreLocalVector(sgs_dd_data->dm_dd_outputs, &DD_Outputs_loc));
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 // @brief Create CeedOperator to calculate data-drive SGS at nodes using sequentially-applied operators
264 static PetscErrorCode SgsDDSetupNodalEvaluation_Sequential(Ceed ceed, Honee honee, SgsDDSetupData sgs_dd_setup_data) {
265   SgsDDData           sgs_dd_data = honee->sgs_dd_data;
266   CeedInt             num_comp_q, num_comp_grad_velo, num_comp_x, num_comp_grid_aniso, num_comp_eigvec = 9 + 1;
267   PetscInt            dim;
268   CeedVector          inv_multiplicity, eigvec;
269   NodalProjectionData grad_velo_proj;
270   CeedElemRestriction elem_restr_inv_multiplicity, elem_restr_grad_velo, elem_restr_sgs, elem_restr_eigvec, elem_restr_dd_inputs,
271       elem_restr_dd_outputs;
272   DMLabel  domain_label = NULL;
273   PetscInt label_value = 0, height = 0, dm_field = 0;
274 
275   PetscFunctionBeginUser;
276   {  // Create DMs for data-driven input and output values
277     PetscSection section;
278     PetscInt     degree, q_extra;
279     {  // Get degree and number of quadrature points from dm_sgs
280       PetscFE         fe;
281       PetscSpace      basis;
282       PetscQuadrature quadrature;
283       PetscInt        num_qpnts;
284       PetscCall(DMGetField(sgs_dd_data->dm_sgs, 0, NULL, (PetscObject *)&fe));
285       PetscCall(PetscFEGetBasisSpace(fe, &basis));
286       PetscCall(PetscSpaceGetDegree(basis, &degree, NULL));
287       PetscCall(PetscFEGetQuadrature(fe, &quadrature));
288       PetscCall(PetscQuadratureGetOrder(quadrature, &num_qpnts));
289       q_extra = degree - num_qpnts;
290     }
291 
292     PetscCall(DMClone(sgs_dd_data->dm_sgs, &sgs_dd_data->dm_dd_inputs));
293     PetscCall(DMSetMatrixPreallocateSkip(sgs_dd_data->dm_dd_inputs, PETSC_TRUE));
294     PetscCall(PetscObjectSetName((PetscObject)sgs_dd_data->dm_dd_inputs, "Data-Driven Model Inputs"));
295     PetscCall(DMSetupByOrder_FEM(PETSC_TRUE, PETSC_TRUE, degree, 1, q_extra, 1, &sgs_dd_data->num_comp_inputs, sgs_dd_data->dm_dd_inputs));
296     PetscCall(DMGetLocalSection(sgs_dd_data->dm_dd_inputs, &section));
297     PetscCall(PetscSectionSetFieldName(section, 0, ""));
298     for (CeedInt i = 0; i < sgs_dd_data->num_comp_inputs; i++) {
299       char component_name[PETSC_MAX_PATH_LEN];
300 
301       PetscCall(PetscSNPrintf(component_name, sizeof component_name, "DataDrivenInput%" CeedInt_FMT, i + 1));
302       PetscCall(PetscSectionSetComponentName(section, 0, i, component_name));
303     }
304 
305     PetscCall(DMClone(sgs_dd_data->dm_sgs, &sgs_dd_data->dm_dd_outputs));
306     PetscCall(DMSetMatrixPreallocateSkip(sgs_dd_data->dm_dd_outputs, PETSC_TRUE));
307     PetscCall(PetscObjectSetName((PetscObject)sgs_dd_data->dm_dd_outputs, "Data-Driven Model Outputs"));
308     PetscCall(DMSetupByOrder_FEM(PETSC_TRUE, PETSC_TRUE, degree, 1, q_extra, 1, &sgs_dd_data->num_comp_outputs, sgs_dd_data->dm_dd_outputs));
309     PetscCall(DMGetLocalSection(sgs_dd_data->dm_dd_outputs, &section));
310     PetscCall(PetscSectionSetFieldName(section, 0, ""));
311     for (CeedInt i = 0; i < sgs_dd_data->num_comp_outputs; i++) {
312       char component_name[PETSC_MAX_PATH_LEN];
313 
314       PetscCall(PetscSNPrintf(component_name, sizeof component_name, "DataDrivenOutput%" CeedInt_FMT, i + 1));
315       PetscCall(PetscSectionSetComponentName(section, 0, i, component_name));
316     }
317   }
318 
319   PetscCall(DMGetDimension(honee->dm, &dim));
320   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(honee->elem_restr_x, &num_comp_x));
321   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(honee->elem_restr_q, &num_comp_q));
322   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(sgs_dd_setup_data->elem_restr_grid_aniso, &num_comp_grid_aniso));
323   PetscCall(PetscObjectContainerQuery((PetscObject)honee, GRAD_VELO_PROJ_KEY, &grad_velo_proj));
324 
325   {  // Get velocity gradient information
326     CeedOperatorField op_field;
327     PetscCallCeed(ceed, CeedOperatorGetFieldByName(grad_velo_proj->l2_rhs_ctx->op, "velocity gradient", &op_field));
328     PetscCallCeed(ceed, CeedOperatorFieldGetElemRestriction(op_field, &elem_restr_grad_velo));
329     PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(elem_restr_grad_velo, &num_comp_grad_velo));
330     PetscCallCeed(ceed, CeedElemRestrictionCreateVector(elem_restr_grad_velo, &sgs_dd_data->grad_velo_ceed, NULL));
331   }
332 
333   PetscCall(DMPlexCeedElemRestrictionCreate(ceed, sgs_dd_data->dm_sgs, domain_label, label_value, height, dm_field, &elem_restr_sgs));
334   PetscCallCeed(ceed, CeedElemRestrictionCreateVector(elem_restr_sgs, &sgs_dd_data->sgs_nodal_ceed, NULL));
335   PetscCall(
336       DMPlexCeedElemRestrictionCollocatedCreate(ceed, sgs_dd_data->dm_sgs, domain_label, label_value, height, num_comp_eigvec, &elem_restr_eigvec));
337   PetscCallCeed(ceed, CeedElemRestrictionCreateVector(elem_restr_eigvec, &eigvec, NULL));
338 
339   PetscCall(DMPlexCeedElemRestrictionCreate(ceed, sgs_dd_data->dm_dd_inputs, domain_label, label_value, height, dm_field, &elem_restr_dd_inputs));
340   PetscCall(DMPlexCeedElemRestrictionCreate(ceed, sgs_dd_data->dm_dd_outputs, domain_label, label_value, height, dm_field, &elem_restr_dd_outputs));
341 
342   PetscCall(GetInverseMultiplicity(ceed, sgs_dd_data->dm_sgs, domain_label, label_value, height, dm_field, PETSC_FALSE, &elem_restr_inv_multiplicity,
343                                    &inv_multiplicity));
344 
345   {  // Create operator for data-driven input evaluation
346     CeedQFunction qf_sgs_dd_inputs;
347     CeedOperator  op_sgs_dd_inputs;
348 
349     switch (honee->phys->state_var) {
350       case STATEVAR_PRIMITIVE:
351         PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Sequential_Inputs_Prim,
352                                                         ComputeSgsDDNodal_Sequential_Inputs_Prim_loc, &qf_sgs_dd_inputs));
353         break;
354       case STATEVAR_CONSERVATIVE:
355         PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Sequential_Inputs_Conserv,
356                                                         ComputeSgsDDNodal_Sequential_Inputs_Conserv_loc, &qf_sgs_dd_inputs));
357         break;
358       case STATEVAR_ENTROPY:
359         PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Sequential_Inputs_Entropy,
360                                                         ComputeSgsDDNodal_Sequential_Inputs_Entropy_loc, &qf_sgs_dd_inputs));
361         break;
362     }
363 
364     PetscCallCeed(ceed, CeedQFunctionSetContext(qf_sgs_dd_inputs, sgs_dd_setup_data->sgsdd_qfctx));
365     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_inputs, "q", num_comp_q, CEED_EVAL_NONE));
366     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_inputs, "gradient velocity", num_comp_grad_velo, CEED_EVAL_NONE));
367     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_inputs, "anisotropy tensor", num_comp_grid_aniso, CEED_EVAL_NONE));
368     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_inputs, "inverse multiplicity", 1, CEED_EVAL_NONE));
369     PetscCallCeed(ceed, CeedQFunctionAddOutput(qf_sgs_dd_inputs, "eigenvectors", num_comp_eigvec, CEED_EVAL_NONE));
370     PetscCallCeed(ceed, CeedQFunctionAddOutput(qf_sgs_dd_inputs, "model inputs", sgs_dd_data->num_comp_inputs, CEED_EVAL_NONE));
371 
372     PetscCallCeed(ceed, CeedOperatorCreate(ceed, qf_sgs_dd_inputs, NULL, NULL, &op_sgs_dd_inputs));
373     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_inputs, "q", honee->elem_restr_q, CEED_BASIS_NONE, honee->q_ceed));
374     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_inputs, "gradient velocity", elem_restr_grad_velo, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
375     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_inputs, "anisotropy tensor", sgs_dd_setup_data->elem_restr_grid_aniso, CEED_BASIS_NONE,
376                                              sgs_dd_setup_data->grid_aniso_ceed));
377     PetscCallCeed(ceed,
378                   CeedOperatorSetField(op_sgs_dd_inputs, "inverse multiplicity", elem_restr_inv_multiplicity, CEED_BASIS_NONE, inv_multiplicity));
379     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_inputs, "eigenvectors", elem_restr_eigvec, CEED_BASIS_NONE, eigvec));
380     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_inputs, "model inputs", elem_restr_dd_inputs, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
381 
382     PetscCall(OperatorApplyContextCreate(grad_velo_proj->dm, sgs_dd_data->dm_dd_inputs, ceed, op_sgs_dd_inputs, NULL, NULL, NULL, NULL,
383                                          &sgs_dd_data->op_nodal_dd_inputs_ctx));
384     PetscCallCeed(ceed, CeedOperatorDestroy(&op_sgs_dd_inputs));
385     PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_sgs_dd_inputs));
386   }
387 
388   {  // Create operator for data-driven output handling
389     CeedQFunction qf_sgs_dd_outputs;
390     CeedOperator  op_sgs_dd_outputs;
391 
392     PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, ComputeSgsDDNodal_Sequential_Outputs, ComputeSgsDDNodal_Sequential_Outputs_loc,
393                                                     &qf_sgs_dd_outputs));
394     PetscCallCeed(ceed, CeedQFunctionSetContext(qf_sgs_dd_outputs, sgs_dd_setup_data->sgsdd_qfctx));
395     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_outputs, "model outputs", sgs_dd_data->num_comp_outputs, CEED_EVAL_NONE));
396     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_outputs, "anisotropy tensor", num_comp_grid_aniso, CEED_EVAL_NONE));
397     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_outputs, "inverse multiplicity", 1, CEED_EVAL_NONE));
398     PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_dd_outputs, "eigenvectors", num_comp_eigvec, CEED_EVAL_NONE));
399     PetscCallCeed(ceed, CeedQFunctionAddOutput(qf_sgs_dd_outputs, "km_sgs", sgs_dd_data->num_comp_sgs, CEED_EVAL_NONE));
400 
401     PetscCallCeed(ceed, CeedOperatorCreate(ceed, qf_sgs_dd_outputs, NULL, NULL, &op_sgs_dd_outputs));
402     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_outputs, "model outputs", elem_restr_dd_outputs, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
403     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_outputs, "anisotropy tensor", sgs_dd_setup_data->elem_restr_grid_aniso, CEED_BASIS_NONE,
404                                              sgs_dd_setup_data->grid_aniso_ceed));
405     PetscCallCeed(ceed,
406                   CeedOperatorSetField(op_sgs_dd_outputs, "inverse multiplicity", elem_restr_inv_multiplicity, CEED_BASIS_NONE, inv_multiplicity));
407     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_outputs, "eigenvectors", elem_restr_eigvec, CEED_BASIS_NONE, eigvec));
408     PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_dd_outputs, "km_sgs", elem_restr_sgs, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
409 
410     PetscCall(OperatorApplyContextCreate(sgs_dd_data->dm_dd_outputs, sgs_dd_data->dm_sgs, ceed, op_sgs_dd_outputs, NULL, sgs_dd_data->sgs_nodal_ceed,
411                                          NULL, NULL, &sgs_dd_data->op_nodal_dd_outputs_ctx));
412     PetscCallCeed(ceed, CeedOperatorDestroy(&op_sgs_dd_outputs));
413     PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_sgs_dd_outputs));
414   }
415 
416   sgs_dd_data->sgs_nodal_eval = SgsDDNodalStressEval_Sequential;
417 
418   if (sgs_dd_setup_data->sgs_dd_model_implementation == SGS_MODEL_DD_SEQENTIAL_CEED) {
419     sgs_dd_data->sgs_nodal_inference = SgsDDNodalStressEval_Sequential_Ceed;
420     PetscCall(SgsDDSetupNodalEvaluation_Sequential_Ceed(ceed, sgs_dd_data, sgs_dd_setup_data, elem_restr_dd_inputs, elem_restr_dd_outputs,
421                                                         elem_restr_inv_multiplicity, inv_multiplicity, &sgs_dd_data->sgs_nodal_inference_ctx));
422   } else if (sgs_dd_setup_data->sgs_dd_model_implementation == SGS_MODEL_DD_SEQENTIAL_TORCH) {
423     sgs_dd_data->sgs_nodal_inference = SgsDDNodalStressEval_Sequential_Torch;
424     PetscCall(SgsDDSetupNodalEvaluation_Sequential_Torch(ceed, sgs_dd_data, sgs_dd_setup_data, elem_restr_dd_inputs, elem_restr_dd_outputs,
425                                                          elem_restr_inv_multiplicity, inv_multiplicity, &sgs_dd_data->sgs_nodal_inference_ctx));
426   }
427 
428   sgs_dd_setup_data->elem_restr_sgs = elem_restr_sgs;
429 
430   PetscCallCeed(ceed, CeedVectorDestroy(&inv_multiplicity));
431   PetscCallCeed(ceed, CeedVectorDestroy(&eigvec));
432   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_inv_multiplicity));
433   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_eigvec));
434   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_dd_inputs));
435   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_dd_outputs));
436   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_grad_velo));
437   PetscFunctionReturn(PETSC_SUCCESS);
438 }
439 
440 // @brief Create CeedOperator to compute SGS contribution to the residual
441 static PetscErrorCode SgsSetupNodalIFunction(Ceed ceed, Honee honee, SgsDDSetupData sgs_dd_setup_data) {
442   SgsDDData           sgs_dd_data = honee->sgs_dd_data;
443   CeedInt             num_comp_q, q_data_size, num_comp_x;
444   PetscInt            dim;
445   CeedQFunction       qf_sgs_apply;
446   CeedOperator        op_sgs_apply;
447   CeedBasis           basis_sgs;
448   CeedVector          q_data;
449   CeedElemRestriction elem_restr_qd;
450 
451   PetscFunctionBeginUser;
452   PetscCall(DMGetDimension(honee->dm, &dim));
453   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(honee->elem_restr_q, &num_comp_q));
454   PetscCallCeed(ceed, CeedElemRestrictionGetNumComponents(honee->elem_restr_x, &num_comp_x));
455 
456   {
457     DMLabel  domain_label = NULL;
458     PetscInt label_value = 0, height = 0, dm_field = 0;
459 
460     PetscCall(CreateBasisFromPlex(ceed, sgs_dd_data->dm_sgs, domain_label, label_value, height, dm_field, &basis_sgs));
461     PetscCall(QDataGet(ceed, sgs_dd_data->dm_sgs, domain_label, label_value, honee->elem_restr_x, honee->basis_x, honee->x_coord, &elem_restr_qd,
462                        &q_data, &q_data_size));
463   }
464 
465   switch (honee->phys->state_var) {
466     case STATEVAR_PRIMITIVE:
467       PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, IFunction_NodalSgs_Prim, IFunction_NodalSgs_Prim_loc, &qf_sgs_apply));
468       break;
469     case STATEVAR_CONSERVATIVE:
470       PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, IFunction_NodalSgs_Conserv, IFunction_NodalSgs_Conserv_loc, &qf_sgs_apply));
471       break;
472     case STATEVAR_ENTROPY:
473       PetscCallCeed(ceed, CeedQFunctionCreateInterior(ceed, 1, IFunction_NodalSgs_Entropy, IFunction_NodalSgs_Entropy_loc, &qf_sgs_apply));
474       break;
475   }
476 
477   PetscCallCeed(ceed, CeedQFunctionSetContext(qf_sgs_apply, sgs_dd_setup_data->ifunction_qfctx));
478   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_apply, "q", num_comp_q, CEED_EVAL_INTERP));
479   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_apply, "qdata", q_data_size, CEED_EVAL_NONE));
480   PetscCallCeed(ceed, CeedQFunctionAddInput(qf_sgs_apply, "km_sgs", sgs_dd_data->num_comp_sgs, CEED_EVAL_INTERP));
481   PetscCallCeed(ceed, CeedQFunctionAddOutput(qf_sgs_apply, "Grad_v", num_comp_q * dim, CEED_EVAL_GRAD));
482 
483   PetscCallCeed(ceed, CeedOperatorCreate(ceed, qf_sgs_apply, NULL, NULL, &op_sgs_apply));
484   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_apply, "q", honee->elem_restr_q, honee->basis_q, CEED_VECTOR_ACTIVE));
485   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_apply, "qdata", elem_restr_qd, CEED_BASIS_NONE, q_data));
486   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_apply, "km_sgs", sgs_dd_setup_data->elem_restr_sgs, basis_sgs, sgs_dd_data->sgs_nodal_ceed));
487   PetscCallCeed(ceed, CeedOperatorSetField(op_sgs_apply, "Grad_v", honee->elem_restr_q, honee->basis_q, CEED_VECTOR_ACTIVE));
488 
489   PetscCall(
490       OperatorApplyContextCreate(honee->dm, honee->dm, ceed, op_sgs_apply, honee->q_ceed, honee->g_ceed, NULL, NULL, &sgs_dd_data->op_sgs_apply_ctx));
491 
492   PetscCallCeed(ceed, CeedVectorDestroy(&q_data));
493   PetscCallCeed(ceed, CeedElemRestrictionDestroy(&elem_restr_qd));
494   PetscCallCeed(ceed, CeedBasisDestroy(&basis_sgs));
495   PetscCallCeed(ceed, CeedOperatorDestroy(&op_sgs_apply));
496   PetscCallCeed(ceed, CeedQFunctionDestroy(&qf_sgs_apply));
497   PetscFunctionReturn(PETSC_SUCCESS);
498 }
499 
500 // @brief Calculate and add data-driven SGS residual to the global residual
501 PetscErrorCode SgsDDApplyIFunction(Honee honee, const Vec Q_loc, Vec G_loc) {
502   SgsDDData           sgs_dd_data = honee->sgs_dd_data;
503   Vec                 VelocityGradient, SGSNodal_loc;
504   PetscMemType        sgs_nodal_mem_type;
505   NodalProjectionData grad_velo_proj;
506 
507   PetscFunctionBeginUser;
508   PetscCall(PetscLogEventBegin(HONEE_SgsModel, Q_loc, G_loc, NULL, NULL));
509   PetscCall(PetscObjectContainerQuery((PetscObject)honee, GRAD_VELO_PROJ_KEY, &grad_velo_proj));
510   PetscCall(DMGetGlobalVector(grad_velo_proj->dm, &VelocityGradient));
511   PetscCall(VelocityGradientProjectionApply(grad_velo_proj, Q_loc, VelocityGradient));
512 
513   // -- Compute Nodal SGS tensor
514   PetscCall(DMGetLocalVector(sgs_dd_data->dm_sgs, &SGSNodal_loc));
515   PetscCall(sgs_dd_data->sgs_nodal_eval(honee, Q_loc, VelocityGradient, SGSNodal_loc));
516 
517   // -- Compute contribution of the SGS stress
518   PetscCall(VecPetscToCeed(SGSNodal_loc, &sgs_nodal_mem_type, sgs_dd_data->sgs_nodal_ceed));  // sgs_nodal_ceed is an implicit input
519   PetscCall(ApplyAddCeedOperatorLocalToLocal(Q_loc, G_loc, sgs_dd_data->op_sgs_apply_ctx));
520 
521   // -- Return local SGS vector
522   PetscCall(VecCeedToPetsc(sgs_dd_data->sgs_nodal_ceed, sgs_nodal_mem_type, SGSNodal_loc));
523   PetscCall(DMRestoreLocalVector(sgs_dd_data->dm_sgs, &SGSNodal_loc));
524   PetscCall(DMRestoreGlobalVector(grad_velo_proj->dm, &VelocityGradient));
525   PetscCall(PetscLogEventEnd(HONEE_SgsModel, Q_loc, G_loc, NULL, NULL));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 // @brief B = A^T, A is NxM, B is MxN
530 static PetscErrorCode TransposeMatrix(const PetscScalar *A, PetscScalar *B, const PetscInt N, const PetscInt M) {
531   PetscFunctionBeginUser;
532   for (PetscInt i = 0; i < N; i++) {
533     for (PetscInt j = 0; j < M; j++) {
534       B[j * N + i] = A[i * M + j];
535     }
536   }
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 // @brief Read neural network coefficients from file and put into context struct
541 static PetscErrorCode SgsDDContextFill(MPI_Comm comm, char data_dir[PETSC_MAX_PATH_LEN], SgsDDContext *psgsdd_ctx) {
542   SgsDDContext sgsdd_ctx;
543   PetscInt     num_inputs = (*psgsdd_ctx)->num_inputs, num_outputs = (*psgsdd_ctx)->num_outputs, num_neurons = (*psgsdd_ctx)->num_neurons;
544   char         file_path[PETSC_MAX_PATH_LEN];
545   PetscScalar *temp;
546 
547   PetscFunctionBeginUser;
548   {
549     SgsDDContext sgsdd_temp;
550     PetscCall(PetscNew(&sgsdd_temp));
551     *sgsdd_temp                     = **psgsdd_ctx;
552     sgsdd_temp->offsets.bias1       = 0;
553     sgsdd_temp->offsets.bias2       = sgsdd_temp->offsets.bias1 + num_neurons;
554     sgsdd_temp->offsets.weight1     = sgsdd_temp->offsets.bias2 + num_neurons;
555     sgsdd_temp->offsets.weight2     = sgsdd_temp->offsets.weight1 + num_neurons * num_inputs;
556     sgsdd_temp->offsets.out_scaling = sgsdd_temp->offsets.weight2 + num_inputs * num_neurons;
557     PetscInt total_num_scalars      = sgsdd_temp->offsets.out_scaling + 2 * num_outputs;
558     sgsdd_temp->total_bytes         = sizeof(*sgsdd_ctx) + total_num_scalars * sizeof(sgsdd_ctx->data[0]);
559     PetscCall(PetscMalloc(sgsdd_temp->total_bytes, &sgsdd_ctx));
560     *sgsdd_ctx = *sgsdd_temp;
561     PetscCall(PetscFree(sgsdd_temp));
562   }
563 
564   PetscCall(PetscSNPrintf(file_path, sizeof file_path, "%s/%s", data_dir, "b1.dat"));
565   PetscCall(PhastaDatFileReadToArrayReal(comm, file_path, &sgsdd_ctx->data[sgsdd_ctx->offsets.bias1]));
566   PetscCall(PetscSNPrintf(file_path, sizeof file_path, "%s/%s", data_dir, "b2.dat"));
567   PetscCall(PhastaDatFileReadToArrayReal(comm, file_path, &sgsdd_ctx->data[sgsdd_ctx->offsets.bias2]));
568   PetscCall(PetscSNPrintf(file_path, sizeof file_path, "%s/%s", data_dir, "OutScaling.dat"));
569   PetscCall(PhastaDatFileReadToArrayReal(comm, file_path, &sgsdd_ctx->data[sgsdd_ctx->offsets.out_scaling]));
570 
571   {
572     PetscCall(PetscMalloc1(num_inputs * num_neurons, &temp));
573     PetscCall(PetscSNPrintf(file_path, sizeof file_path, "%s/%s", data_dir, "w1.dat"));
574     PetscCall(PhastaDatFileReadToArrayReal(comm, file_path, temp));
575     PetscCall(TransposeMatrix(temp, &sgsdd_ctx->data[sgsdd_ctx->offsets.weight1], num_inputs, num_neurons));
576     PetscCall(PetscFree(temp));
577   }
578   {
579     PetscCall(PetscMalloc1(num_outputs * num_neurons, &temp));
580     PetscCall(PetscSNPrintf(file_path, sizeof file_path, "%s/%s", data_dir, "w2.dat"));
581     PetscCall(PhastaDatFileReadToArrayReal(comm, file_path, temp));
582     PetscCall(TransposeMatrix(temp, &sgsdd_ctx->data[sgsdd_ctx->offsets.weight2], num_neurons, num_outputs));
583     PetscCall(PetscFree(temp));
584   }
585 
586   PetscCall(PetscFree(*psgsdd_ctx));
587   *psgsdd_ctx = sgsdd_ctx;
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 PetscErrorCode SgsDDSetup(Ceed ceed, Honee honee, ProblemData problem) {
592   PetscReal                alpha = 0;
593   SgsDDContext             sgsdd_ctx;
594   MPI_Comm                 comm                           = honee->comm;
595   char                     sgs_dd_dir[PETSC_MAX_PATH_LEN] = "./dd_sgs_parameters";
596   SgsDDSetupData           sgs_dd_setup_data;
597   NewtonianIdealGasContext gas;
598   NodalProjectionData      grad_velo_proj;
599 
600   PetscFunctionBeginUser;
601   PetscCall(VelocityGradientProjectionSetup(ceed, honee, problem, honee->phys->state_var, honee->elem_restr_q, honee->basis_q, &grad_velo_proj));
602   PetscCall(PetscObjectContainerCompose((PetscObject)honee, GRAD_VELO_PROJ_KEY, grad_velo_proj, (PetscCtxDestroyFn *)NodalProjectionDataDestroy));
603 
604   PetscCall(PetscNew(&honee->sgs_dd_data));
605   honee->sgs_dd_data->num_comp_inputs  = 6;
606   honee->sgs_dd_data->num_comp_outputs = 6;
607 
608   PetscCall(PetscNew(&sgs_dd_setup_data));
609 
610   PetscOptionsBegin(comm, NULL, "SGS Data-Driven Model Options", NULL);
611   PetscCall(PetscOptionsReal("-sgs_model_dd_leakyrelu_alpha", "Slope parameter for Leaky ReLU activation function", NULL, alpha, &alpha, NULL));
612   PetscCall(PetscOptionsString("-sgs_model_dd_parameter_dir", "Path to directory with model parameters (weights, biases, etc.)", NULL, sgs_dd_dir,
613                                sgs_dd_dir, sizeof(sgs_dd_dir), NULL));
614   PetscCall(PetscOptionsDeprecated("-sgs_model_dd_use_fused", NULL, "libCEED 0.12.0", "Use -sgs_model_dd_type instead"));
615   sgs_dd_setup_data->sgs_dd_model_implementation = SGS_MODEL_DD_FUSED;
616   PetscCall(PetscOptionsEnum("-sgs_model_dd_implementation", "Data-Driven SGS model implementation", NULL, SGSModelDDImplementations,
617                              (PetscEnum)sgs_dd_setup_data->sgs_dd_model_implementation, (PetscEnum *)&sgs_dd_setup_data->sgs_dd_model_implementation,
618                              NULL));
619   PetscOptionsEnd();
620 
621   PetscCall(PetscNew(&sgsdd_ctx));
622   sgsdd_ctx->num_layers  = 1;
623   sgsdd_ctx->num_inputs  = 6;
624   sgsdd_ctx->num_outputs = 6;
625   sgsdd_ctx->num_neurons = 20;
626   sgsdd_ctx->alpha       = alpha;
627 
628   PetscCall(SgsDDContextFill(comm, sgs_dd_dir, &sgsdd_ctx));
629 
630   // -- Create DM for storing SGS tensor at nodes
631   PetscCall(
632       SgsDDCreateDM(honee->dm, &honee->sgs_dd_data->dm_sgs, honee->app_ctx->degree, honee->app_ctx->q_extra, &honee->sgs_dd_data->num_comp_sgs));
633 
634   PetscCallCeed(ceed, CeedQFunctionContextGetDataRead(problem->apply_vol_ifunction.qfctx, CEED_MEM_HOST, &gas));
635   sgsdd_ctx->gas = *gas;
636   PetscCallCeed(ceed, CeedQFunctionContextRestoreDataRead(problem->apply_vol_ifunction.qfctx, &gas));
637   PetscCallCeed(ceed, CeedQFunctionContextCreate(honee->ceed, &sgs_dd_setup_data->sgsdd_qfctx));
638   PetscCallCeed(ceed,
639                 CeedQFunctionContextSetData(sgs_dd_setup_data->sgsdd_qfctx, CEED_MEM_HOST, CEED_USE_POINTER, sgsdd_ctx->total_bytes, sgsdd_ctx));
640   PetscCallCeed(ceed, CeedQFunctionContextSetDataDestroy(sgs_dd_setup_data->sgsdd_qfctx, CEED_MEM_HOST, FreeContextPetsc));
641 
642   PetscCallCeed(ceed, CeedQFunctionContextReferenceCopy(problem->apply_vol_ifunction.qfctx, &sgs_dd_setup_data->ifunction_qfctx));
643 
644   // -- Compute and store anisotropy tensor
645   PetscCall(GridAnisotropyTensorProjectionSetupApply(ceed, honee, &sgs_dd_setup_data->elem_restr_grid_aniso, &sgs_dd_setup_data->grid_aniso_ceed));
646 
647   // -- Create Nodal Evaluation Operator
648   switch (sgs_dd_setup_data->sgs_dd_model_implementation) {
649     case SGS_MODEL_DD_FUSED:
650       PetscCall(SgsDDSetupNodalEvaluation_Fused(ceed, honee, sgs_dd_setup_data));
651       break;
652     case SGS_MODEL_DD_SEQENTIAL_CEED:
653     case SGS_MODEL_DD_SEQENTIAL_TORCH:
654       PetscCall(SgsDDSetupNodalEvaluation_Sequential(ceed, honee, sgs_dd_setup_data));
655       break;
656   }
657 
658   // -- Create Operator to evalutate residual of SGS stress
659   PetscCall(SgsSetupNodalIFunction(ceed, honee, sgs_dd_setup_data));
660 
661   PetscCall(SgsDDSetupDataDestroy(sgs_dd_setup_data));
662   PetscFunctionReturn(PETSC_SUCCESS);
663 }
664 
665 PetscErrorCode SgsDDDataDestroy(SgsDDData sgs_dd_data) {
666   PetscFunctionBeginUser;
667   if (!sgs_dd_data) PetscFunctionReturn(PETSC_SUCCESS);
668   Ceed ceed = sgs_dd_data->op_sgs_apply_ctx->ceed;
669 
670   PetscCallCeed(ceed, CeedVectorDestroy(&sgs_dd_data->sgs_nodal_ceed));
671   PetscCallCeed(ceed, CeedVectorDestroy(&sgs_dd_data->grad_velo_ceed));
672   PetscCall(OperatorApplyContextDestroy(sgs_dd_data->op_nodal_evaluation_ctx));
673   PetscCall(OperatorApplyContextDestroy(sgs_dd_data->op_sgs_apply_ctx));
674   PetscCall(OperatorApplyContextDestroy(sgs_dd_data->op_nodal_dd_inputs_ctx));
675   PetscCall(OperatorApplyContextDestroy(sgs_dd_data->op_nodal_dd_outputs_ctx));
676   PetscCall(DMDestroy(&sgs_dd_data->dm_sgs));
677   PetscCall(DMDestroy(&sgs_dd_data->dm_dd_inputs));
678   PetscCall(DMDestroy(&sgs_dd_data->dm_dd_outputs));
679   if (sgs_dd_data->sgs_nodal_inference_ctx) PetscCall(sgs_dd_data->sgs_nodal_inference_ctx_destroy(sgs_dd_data->sgs_nodal_inference_ctx));
680   PetscCall(PetscFree(sgs_dd_data));
681   PetscFunctionReturn(PETSC_SUCCESS);
682 }
683