1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed/jit-source/hip/hip-types.h> 11 #include <stddef.h> 12 #include <hip/hiprtc.h> 13 14 #include "../hip/ceed-hip-common.h" 15 #include "../hip/ceed-hip-compile.h" 16 #include "ceed-hip-gen-operator-build.h" 17 #include "ceed-hip-gen.h" 18 19 //------------------------------------------------------------------------------ 20 // Destroy operator 21 //------------------------------------------------------------------------------ 22 static int CeedOperatorDestroy_Hip_gen(CeedOperator op) { 23 Ceed ceed; 24 CeedOperator_Hip_gen *impl; 25 bool is_composite; 26 27 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 28 CeedCallBackend(CeedOperatorGetData(op, &impl)); 29 CeedCallBackend(CeedOperatorIsComposite(op, &is_composite)); 30 if (is_composite) { 31 CeedInt num_suboperators; 32 33 CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 34 for (CeedInt i = 0; i < num_suboperators; i++) { 35 if (impl->streams[i]) CeedCallHip(ceed, hipStreamDestroy(impl->streams[i])); 36 impl->streams[i] = NULL; 37 } 38 } 39 if (impl->module) CeedCallHip(ceed, hipModuleUnload(impl->module)); 40 if (impl->module_assemble_full) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_full)); 41 if (impl->module_assemble_diagonal) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_diagonal)); 42 if (impl->module_assemble_qfunction) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_qfunction)); 43 if (impl->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)impl->points.num_per_elem)); 44 CeedCallBackend(CeedFree(&impl)); 45 CeedCallBackend(CeedDestroy(&ceed)); 46 return CEED_ERROR_SUCCESS; 47 } 48 49 //------------------------------------------------------------------------------ 50 // Apply and add to output 51 //------------------------------------------------------------------------------ 52 static int CeedOperatorApplyAddCore_Hip_gen(CeedOperator op, hipStream_t stream, const CeedScalar *input_arr, CeedScalar *output_arr, 53 bool *is_run_good, CeedRequest *request) { 54 bool is_at_points, is_tensor; 55 Ceed ceed; 56 CeedInt num_elem, num_input_fields, num_output_fields; 57 CeedEvalMode eval_mode; 58 CeedQFunctionField *qf_input_fields, *qf_output_fields; 59 CeedQFunction_Hip_gen *qf_data; 60 CeedQFunction qf; 61 CeedOperatorField *op_input_fields, *op_output_fields; 62 CeedOperator_Hip_gen *data; 63 64 // Creation of the operator 65 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, is_run_good)); 66 if (!(*is_run_good)) return CEED_ERROR_SUCCESS; 67 68 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 69 CeedCallBackend(CeedOperatorGetData(op, &data)); 70 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 71 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 72 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 73 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 74 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 75 76 // Input vectors 77 for (CeedInt i = 0; i < num_input_fields; i++) { 78 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 79 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 80 data->fields.inputs[i] = NULL; 81 } else { 82 bool is_active; 83 CeedVector vec; 84 85 // Get input vector 86 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 87 is_active = vec == CEED_VECTOR_ACTIVE; 88 if (is_active) data->fields.inputs[i] = input_arr; 89 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 90 CeedCallBackend(CeedVectorDestroy(&vec)); 91 } 92 } 93 94 // Output vectors 95 for (CeedInt i = 0; i < num_output_fields; i++) { 96 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); 97 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 98 data->fields.outputs[i] = NULL; 99 } else { 100 bool is_active; 101 CeedVector vec; 102 103 // Get output vector 104 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 105 is_active = vec == CEED_VECTOR_ACTIVE; 106 if (is_active) data->fields.outputs[i] = output_arr; 107 else CeedCallBackend(CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.outputs[i])); 108 CeedCallBackend(CeedVectorDestroy(&vec)); 109 } 110 } 111 112 // Point coordinates, if needed 113 CeedCallBackend(CeedOperatorIsAtPoints(op, &is_at_points)); 114 if (is_at_points) { 115 // Coords 116 CeedVector vec; 117 118 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 119 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords)); 120 CeedCallBackend(CeedVectorDestroy(&vec)); 121 122 // Points per elem 123 if (num_elem != data->points.num_elem) { 124 CeedInt *points_per_elem; 125 const CeedInt num_bytes = num_elem * sizeof(CeedInt); 126 CeedElemRestriction rstr_points = NULL; 127 128 data->points.num_elem = num_elem; 129 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL)); 130 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem)); 131 for (CeedInt e = 0; e < num_elem; e++) { 132 CeedInt num_points_elem; 133 134 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem)); 135 points_per_elem[e] = num_points_elem; 136 } 137 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem)); 138 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes)); 139 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice)); 140 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points)); 141 CeedCallBackend(CeedFree(&points_per_elem)); 142 } 143 } 144 145 // Get context data 146 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 147 148 // Apply operator 149 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points}; 150 151 CeedCallBackend(CeedOperatorHasTensorBases(op, &is_tensor)); 152 CeedInt block_sizes[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1}; 153 154 if (is_tensor) { 155 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 156 if (is_at_points) block_sizes[2] = 1; 157 } else { 158 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64; 159 160 elems_per_block = elems_per_block > 0 ? elems_per_block : 1; 161 block_sizes[2] = elems_per_block; 162 } 163 if (data->dim == 1 || !is_tensor) { 164 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 165 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 166 167 CeedCallBackend( 168 CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, is_run_good, opargs)); 169 } else if (data->dim == 2) { 170 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 171 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 172 173 CeedCallBackend( 174 CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, is_run_good, opargs)); 175 } else if (data->dim == 3) { 176 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 177 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 178 179 CeedCallBackend( 180 CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, is_run_good, opargs)); 181 } 182 183 // Restore input arrays 184 for (CeedInt i = 0; i < num_input_fields; i++) { 185 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 186 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 187 } else { 188 bool is_active; 189 CeedVector vec; 190 191 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 192 is_active = vec == CEED_VECTOR_ACTIVE; 193 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 194 CeedCallBackend(CeedVectorDestroy(&vec)); 195 } 196 } 197 198 // Restore output arrays 199 for (CeedInt i = 0; i < num_output_fields; i++) { 200 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); 201 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 202 } else { 203 bool is_active; 204 CeedVector vec; 205 206 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 207 is_active = vec == CEED_VECTOR_ACTIVE; 208 if (!is_active) CeedCallBackend(CeedVectorRestoreArray(vec, &data->fields.outputs[i])); 209 CeedCallBackend(CeedVectorDestroy(&vec)); 210 } 211 } 212 213 // Restore point coordinates, if needed 214 if (is_at_points) { 215 CeedVector vec; 216 217 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 218 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords)); 219 CeedCallBackend(CeedVectorDestroy(&vec)); 220 } 221 222 // Restore context data 223 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 224 225 // Cleanup 226 CeedCallBackend(CeedDestroy(&ceed)); 227 CeedCallBackend(CeedQFunctionDestroy(&qf)); 228 if (!(*is_run_good)) data->use_fallback = true; 229 return CEED_ERROR_SUCCESS; 230 } 231 232 static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) { 233 bool is_run_good = false; 234 const CeedScalar *input_arr = NULL; 235 CeedScalar *output_arr = NULL; 236 237 // Try to run kernel 238 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr)); 239 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr)); 240 CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(op, NULL, input_arr, output_arr, &is_run_good, request)); 241 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr)); 242 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArray(output_vec, &output_arr)); 243 244 // Fallback on unsuccessful run 245 if (!is_run_good) { 246 CeedOperator op_fallback; 247 248 CeedDebug256(CeedOperatorReturnCeed(op), CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator"); 249 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 250 CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request)); 251 } 252 return CEED_ERROR_SUCCESS; 253 } 254 255 static int CeedOperatorApplyAddComposite_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) { 256 bool is_run_good[CEED_COMPOSITE_MAX] = {false}; 257 CeedInt num_suboperators; 258 const CeedScalar *input_arr = NULL; 259 CeedScalar *output_arr = NULL; 260 Ceed ceed; 261 CeedOperator_Hip_gen *impl; 262 CeedOperator *sub_operators; 263 264 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 265 CeedCallBackend(CeedOperatorGetData(op, &impl)); 266 CeedCallBackend(CeedCompositeOperatorGetNumSub(op, &num_suboperators)); 267 CeedCallBackend(CeedCompositeOperatorGetSubList(op, &sub_operators)); 268 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr)); 269 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr)); 270 for (CeedInt i = 0; i < num_suboperators; i++) { 271 CeedInt num_elem = 0; 272 273 CeedCallBackend(CeedOperatorGetNumElements(sub_operators[i], &num_elem)); 274 if (num_elem > 0) { 275 if (!impl->streams[i]) CeedCallHip(ceed, hipStreamCreate(&impl->streams[i])); 276 CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(sub_operators[i], impl->streams[i], input_arr, output_arr, &is_run_good[i], request)); 277 } else { 278 is_run_good[i] = true; 279 } 280 } 281 282 for (CeedInt i = 0; i < num_suboperators; i++) { 283 if (impl->streams[i]) { 284 if (is_run_good[i]) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[i])); 285 } 286 } 287 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr)); 288 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArray(output_vec, &output_arr)); 289 CeedCallHip(ceed, hipDeviceSynchronize()); 290 291 // Fallback on unsuccessful run 292 for (CeedInt i = 0; i < num_suboperators; i++) { 293 if (!is_run_good[i]) { 294 CeedOperator op_fallback; 295 296 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator"); 297 CeedCallBackend(CeedOperatorGetFallback(sub_operators[i], &op_fallback)); 298 CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request)); 299 } 300 } 301 CeedCallBackend(CeedDestroy(&ceed)); 302 return CEED_ERROR_SUCCESS; 303 } 304 305 //------------------------------------------------------------------------------ 306 // QFunction assembly 307 //------------------------------------------------------------------------------ 308 static int CeedOperatorLinearAssembleQFunctionCore_Hip_gen(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 309 CeedRequest *request) { 310 Ceed ceed; 311 CeedOperator_Hip_gen *data; 312 313 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 314 CeedCallBackend(CeedOperatorGetData(op, &data)); 315 316 // Build the assembly kernel 317 if (!data->assemble_qfunction && !data->use_assembly_fallback) { 318 bool is_build_good = false; 319 320 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good)); 321 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen(op, &is_build_good)); 322 if (!is_build_good) data->use_assembly_fallback = true; 323 } 324 325 // Try assembly 326 if (!data->use_assembly_fallback) { 327 bool is_run_good = true; 328 Ceed_Hip *hip_data; 329 CeedInt num_elem, num_input_fields, num_output_fields; 330 CeedEvalMode eval_mode; 331 CeedScalar *assembled_array; 332 CeedQFunctionField *qf_input_fields, *qf_output_fields; 333 CeedQFunction_Hip_gen *qf_data; 334 CeedQFunction qf; 335 CeedOperatorField *op_input_fields, *op_output_fields; 336 337 CeedCallBackend(CeedGetData(ceed, &hip_data)); 338 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 339 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 340 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 341 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 342 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 343 344 // Input vectors 345 for (CeedInt i = 0; i < num_input_fields; i++) { 346 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 347 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 348 data->fields.inputs[i] = NULL; 349 } else { 350 bool is_active; 351 CeedVector vec; 352 353 // Get input vector 354 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 355 is_active = vec == CEED_VECTOR_ACTIVE; 356 if (is_active) data->fields.inputs[i] = NULL; 357 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 358 CeedCallBackend(CeedVectorDestroy(&vec)); 359 } 360 } 361 362 // Get context data 363 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 364 365 // Build objects if needed 366 if (build_objects) { 367 CeedInt qf_size_in = 0, qf_size_out = 0, Q; 368 369 // Count number of active input fields 370 { 371 for (CeedInt i = 0; i < num_input_fields; i++) { 372 CeedInt field_size; 373 CeedVector vec; 374 375 // Get input vector 376 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 377 // Check if active input 378 if (vec == CEED_VECTOR_ACTIVE) { 379 CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); 380 qf_size_in += field_size; 381 } 382 CeedCallBackend(CeedVectorDestroy(&vec)); 383 } 384 CeedCheck(qf_size_in > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); 385 } 386 387 // Count number of active output fields 388 { 389 for (CeedInt i = 0; i < num_output_fields; i++) { 390 CeedInt field_size; 391 CeedVector vec; 392 393 // Get output vector 394 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 395 // Check if active output 396 if (vec == CEED_VECTOR_ACTIVE) { 397 CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size)); 398 qf_size_out += field_size; 399 } 400 CeedCallBackend(CeedVectorDestroy(&vec)); 401 } 402 CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); 403 } 404 CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 405 406 // Actually build objects now 407 const CeedSize l_size = (CeedSize)num_elem * Q * qf_size_in * qf_size_out; 408 CeedInt strides[3] = {1, num_elem * Q, Q}; /* *NOPAD* */ 409 410 // Create output restriction 411 CeedCallBackend(CeedElemRestrictionCreateStrided(ceed, num_elem, Q, qf_size_in * qf_size_out, 412 (CeedSize)qf_size_in * (CeedSize)qf_size_out * (CeedSize)num_elem * (CeedSize)Q, strides, 413 rstr)); 414 // Create assembled vector 415 CeedCallBackend(CeedVectorCreate(ceed, l_size, assembled)); 416 } 417 418 // Assembly array 419 CeedCallBackend(CeedVectorGetArrayWrite(*assembled, CEED_MEM_DEVICE, &assembled_array)); 420 421 // Assemble QFunction 422 bool is_tensor = false; 423 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points, &assembled_array}; 424 425 CeedCallBackend(CeedOperatorHasTensorBases(op, &is_tensor)); 426 CeedInt block_sizes[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1}; 427 428 if (is_tensor) { 429 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 430 } else { 431 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64; 432 433 elems_per_block = elems_per_block > 0 ? elems_per_block : 1; 434 block_sizes[2] = elems_per_block; 435 } 436 if (data->dim == 1 || !is_tensor) { 437 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 438 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 439 440 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 441 sharedMem, &is_run_good, opargs)); 442 } else if (data->dim == 2) { 443 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 444 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 445 446 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 447 sharedMem, &is_run_good, opargs)); 448 } else if (data->dim == 3) { 449 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 450 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 451 452 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 453 sharedMem, &is_run_good, opargs)); 454 } 455 456 // Restore input arrays 457 for (CeedInt i = 0; i < num_input_fields; i++) { 458 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 459 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 460 } else { 461 bool is_active; 462 CeedVector vec; 463 464 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 465 is_active = vec == CEED_VECTOR_ACTIVE; 466 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 467 CeedCallBackend(CeedVectorDestroy(&vec)); 468 } 469 } 470 471 // Restore context data 472 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 473 474 // Restore assembly array 475 CeedCallBackend(CeedVectorRestoreArray(*assembled, &assembled_array)); 476 477 // Cleanup 478 CeedCallBackend(CeedQFunctionDestroy(&qf)); 479 if (!is_run_good) { 480 data->use_assembly_fallback = true; 481 if (build_objects) { 482 CeedCallBackend(CeedVectorDestroy(assembled)); 483 CeedCallBackend(CeedElemRestrictionDestroy(rstr)); 484 } 485 } 486 } 487 CeedCallBackend(CeedDestroy(&ceed)); 488 489 // Fallback, if needed 490 if (data->use_assembly_fallback) { 491 CeedOperator op_fallback; 492 493 CeedDebug256(CeedOperatorReturnCeed(op), CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator"); 494 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 495 CeedCallBackend(CeedOperatorFallbackLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request)); 496 return CEED_ERROR_SUCCESS; 497 } 498 return CEED_ERROR_SUCCESS; 499 } 500 501 static int CeedOperatorLinearAssembleQFunction_Hip_gen(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 502 return CeedOperatorLinearAssembleQFunctionCore_Hip_gen(op, true, assembled, rstr, request); 503 } 504 505 static int CeedOperatorLinearAssembleQFunctionUpdate_Hip_gen(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 506 return CeedOperatorLinearAssembleQFunctionCore_Hip_gen(op, false, &assembled, &rstr, request); 507 } 508 509 //------------------------------------------------------------------------------ 510 // AtPoints diagonal assembly 511 //------------------------------------------------------------------------------ 512 static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen(CeedOperator op, CeedVector assembled, CeedRequest *request) { 513 Ceed ceed; 514 CeedOperator_Hip_gen *data; 515 516 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 517 CeedCallBackend(CeedOperatorGetData(op, &data)); 518 519 // Build the assembly kernel 520 if (!data->assemble_diagonal && !data->use_assembly_fallback) { 521 bool is_build_good = false; 522 CeedInt num_active_bases_in, num_active_bases_out; 523 CeedOperatorAssemblyData assembly_data; 524 525 CeedCallBackend(CeedOperatorGetOperatorAssemblyData(op, &assembly_data)); 526 CeedCallBackend( 527 CeedOperatorAssemblyDataGetEvalModes(assembly_data, &num_active_bases_in, NULL, NULL, NULL, &num_active_bases_out, NULL, NULL, NULL, NULL)); 528 if (num_active_bases_in == num_active_bases_out) { 529 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good)); 530 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelDiagonalAssemblyAtPoints_Hip_gen(op, &is_build_good)); 531 } 532 if (!is_build_good) data->use_assembly_fallback = true; 533 } 534 535 // Try assembly 536 if (!data->use_assembly_fallback) { 537 bool is_run_good = true; 538 Ceed_Hip *hip_data; 539 CeedInt num_elem, num_input_fields, num_output_fields; 540 CeedEvalMode eval_mode; 541 CeedScalar *assembled_array; 542 CeedQFunctionField *qf_input_fields, *qf_output_fields; 543 CeedQFunction_Hip_gen *qf_data; 544 CeedQFunction qf; 545 CeedOperatorField *op_input_fields, *op_output_fields; 546 547 CeedCallBackend(CeedGetData(ceed, &hip_data)); 548 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 549 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 550 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 551 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 552 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 553 554 // Input vectors 555 for (CeedInt i = 0; i < num_input_fields; i++) { 556 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 557 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 558 data->fields.inputs[i] = NULL; 559 } else { 560 bool is_active; 561 CeedVector vec; 562 563 // Get input vector 564 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 565 is_active = vec == CEED_VECTOR_ACTIVE; 566 if (is_active) data->fields.inputs[i] = NULL; 567 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 568 CeedCallBackend(CeedVectorDestroy(&vec)); 569 } 570 } 571 572 // Point coordinates 573 { 574 CeedVector vec; 575 576 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 577 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords)); 578 CeedCallBackend(CeedVectorDestroy(&vec)); 579 580 // Points per elem 581 if (num_elem != data->points.num_elem) { 582 CeedInt *points_per_elem; 583 const CeedInt num_bytes = num_elem * sizeof(CeedInt); 584 CeedElemRestriction rstr_points = NULL; 585 586 data->points.num_elem = num_elem; 587 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL)); 588 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem)); 589 for (CeedInt e = 0; e < num_elem; e++) { 590 CeedInt num_points_elem; 591 592 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem)); 593 points_per_elem[e] = num_points_elem; 594 } 595 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem)); 596 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes)); 597 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice)); 598 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points)); 599 CeedCallBackend(CeedFree(&points_per_elem)); 600 } 601 } 602 603 // Get context data 604 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 605 606 // Assembly array 607 CeedCallBackend(CeedVectorGetArray(assembled, CEED_MEM_DEVICE, &assembled_array)); 608 609 // Assemble diagonal 610 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points, &assembled_array}; 611 612 CeedInt block_sizes[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1}; 613 614 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 615 block_sizes[2] = 1; 616 if (data->dim == 1) { 617 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 618 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 619 620 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 621 sharedMem, &is_run_good, opargs)); 622 } else if (data->dim == 2) { 623 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 624 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 625 626 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 627 sharedMem, &is_run_good, opargs)); 628 } else if (data->dim == 3) { 629 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 630 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 631 632 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 633 sharedMem, &is_run_good, opargs)); 634 } 635 CeedCallHip(ceed, hipDeviceSynchronize()); 636 637 // Restore input arrays 638 for (CeedInt i = 0; i < num_input_fields; i++) { 639 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 640 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 641 } else { 642 bool is_active; 643 CeedVector vec; 644 645 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 646 is_active = vec == CEED_VECTOR_ACTIVE; 647 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 648 CeedCallBackend(CeedVectorDestroy(&vec)); 649 } 650 } 651 652 // Restore point coordinates 653 { 654 CeedVector vec; 655 656 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 657 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords)); 658 CeedCallBackend(CeedVectorDestroy(&vec)); 659 } 660 661 // Restore context data 662 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 663 664 // Restore assembly array 665 CeedCallBackend(CeedVectorRestoreArray(assembled, &assembled_array)); 666 667 // Cleanup 668 CeedCallBackend(CeedQFunctionDestroy(&qf)); 669 if (!is_run_good) data->use_assembly_fallback = true; 670 } 671 CeedCallBackend(CeedDestroy(&ceed)); 672 673 // Fallback, if needed 674 if (data->use_assembly_fallback) { 675 CeedOperator op_fallback; 676 677 CeedDebug256(CeedOperatorReturnCeed(op), CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator"); 678 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 679 CeedCallBackend(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 680 return CEED_ERROR_SUCCESS; 681 } 682 return CEED_ERROR_SUCCESS; 683 } 684 685 //------------------------------------------------------------------------------ 686 // AtPoints full assembly 687 //------------------------------------------------------------------------------ 688 static int CeedSingleOperatorAssembleAtPoints_Hip_gen(CeedOperator op, CeedInt offset, CeedVector assembled) { 689 Ceed ceed; 690 CeedOperator_Hip_gen *data; 691 692 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 693 CeedCallBackend(CeedOperatorGetData(op, &data)); 694 695 // Build the assembly kernel 696 if (!data->assemble_full && !data->use_assembly_fallback) { 697 bool is_build_good = false; 698 CeedInt num_active_bases_in, num_active_bases_out; 699 CeedOperatorAssemblyData assembly_data; 700 701 CeedCallBackend(CeedOperatorGetOperatorAssemblyData(op, &assembly_data)); 702 CeedCallBackend( 703 CeedOperatorAssemblyDataGetEvalModes(assembly_data, &num_active_bases_in, NULL, NULL, NULL, &num_active_bases_out, NULL, NULL, NULL, NULL)); 704 if (num_active_bases_in == num_active_bases_out) { 705 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good)); 706 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelFullAssemblyAtPoints_Hip_gen(op, &is_build_good)); 707 } 708 if (!is_build_good) { 709 CeedDebug(ceed, "Single Operator Assemble at Points compile failed, using fallback\n"); 710 data->use_assembly_fallback = true; 711 } 712 } 713 714 // Try assembly 715 if (!data->use_assembly_fallback) { 716 bool is_run_good = true; 717 Ceed_Hip *Hip_data; 718 CeedInt num_elem, num_input_fields, num_output_fields; 719 CeedEvalMode eval_mode; 720 CeedScalar *assembled_array; 721 CeedQFunctionField *qf_input_fields, *qf_output_fields; 722 CeedQFunction_Hip_gen *qf_data; 723 CeedQFunction qf; 724 CeedOperatorField *op_input_fields, *op_output_fields; 725 726 CeedCallBackend(CeedGetData(ceed, &Hip_data)); 727 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 728 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 729 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 730 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 731 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 732 CeedDebug(ceed, "Running single operator assemble for /gpu/hip/gen\n"); 733 734 // Input vectors 735 for (CeedInt i = 0; i < num_input_fields; i++) { 736 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 737 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 738 data->fields.inputs[i] = NULL; 739 } else { 740 bool is_active; 741 CeedVector vec; 742 743 // Get input vector 744 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 745 is_active = vec == CEED_VECTOR_ACTIVE; 746 if (is_active) data->fields.inputs[i] = NULL; 747 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 748 CeedCallBackend(CeedVectorDestroy(&vec)); 749 } 750 } 751 752 // Point coordinates 753 { 754 CeedVector vec; 755 756 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 757 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords)); 758 CeedCallBackend(CeedVectorDestroy(&vec)); 759 760 // Points per elem 761 if (num_elem != data->points.num_elem) { 762 CeedInt *points_per_elem; 763 const CeedInt num_bytes = num_elem * sizeof(CeedInt); 764 CeedElemRestriction rstr_points = NULL; 765 766 data->points.num_elem = num_elem; 767 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL)); 768 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem)); 769 for (CeedInt e = 0; e < num_elem; e++) { 770 CeedInt num_points_elem; 771 772 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem)); 773 points_per_elem[e] = num_points_elem; 774 } 775 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem)); 776 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes)); 777 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice)); 778 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points)); 779 CeedCallBackend(CeedFree(&points_per_elem)); 780 } 781 } 782 783 // Get context data 784 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 785 786 // Assembly array 787 CeedCallBackend(CeedVectorGetArray(assembled, CEED_MEM_DEVICE, &assembled_array)); 788 CeedScalar *assembled_offset_array = &assembled_array[offset]; 789 790 // Assemble diagonal 791 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, 792 &data->G, &data->W, &data->points, &assembled_offset_array}; 793 794 CeedInt block_sizes[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1}; 795 796 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 797 block_sizes[2] = 1; 798 if (data->dim == 1) { 799 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 800 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 801 802 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 803 &is_run_good, opargs)); 804 } else if (data->dim == 2) { 805 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 806 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 807 808 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 809 &is_run_good, opargs)); 810 } else if (data->dim == 3) { 811 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 812 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 813 814 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 815 &is_run_good, opargs)); 816 } 817 CeedCallHip(ceed, hipDeviceSynchronize()); 818 819 // Restore input arrays 820 for (CeedInt i = 0; i < num_input_fields; i++) { 821 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 822 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 823 } else { 824 bool is_active; 825 CeedVector vec; 826 827 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 828 is_active = vec == CEED_VECTOR_ACTIVE; 829 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 830 CeedCallBackend(CeedVectorDestroy(&vec)); 831 } 832 } 833 834 // Restore point coordinates 835 { 836 CeedVector vec; 837 838 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 839 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords)); 840 CeedCallBackend(CeedVectorDestroy(&vec)); 841 } 842 843 // Restore context data 844 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 845 846 // Restore assembly array 847 CeedCallBackend(CeedVectorRestoreArray(assembled, &assembled_array)); 848 849 // Cleanup 850 CeedCallBackend(CeedQFunctionDestroy(&qf)); 851 if (!is_run_good) { 852 CeedDebug(ceed, "Single Operator Assemble at Points run failed, using fallback\n"); 853 data->use_assembly_fallback = true; 854 } 855 } 856 CeedCallBackend(CeedDestroy(&ceed)); 857 858 // Fallback, if needed 859 if (data->use_assembly_fallback) { 860 CeedOperator op_fallback; 861 862 CeedDebug256(CeedOperatorReturnCeed(op), CEED_DEBUG_COLOR_SUCCESS, "Falling back to /gpu/hip/ref CeedOperator"); 863 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 864 CeedCallBackend(CeedSingleOperatorAssemble(op_fallback, offset, assembled)); 865 return CEED_ERROR_SUCCESS; 866 } 867 return CEED_ERROR_SUCCESS; 868 } 869 870 //------------------------------------------------------------------------------ 871 // Create operator 872 //------------------------------------------------------------------------------ 873 int CeedOperatorCreate_Hip_gen(CeedOperator op) { 874 bool is_composite, is_at_points; 875 Ceed ceed; 876 CeedOperator_Hip_gen *impl; 877 878 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 879 CeedCallBackend(CeedCalloc(1, &impl)); 880 CeedCallBackend(CeedOperatorSetData(op, impl)); 881 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 882 if (is_composite) { 883 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAddComposite", CeedOperatorApplyAddComposite_Hip_gen)); 884 } else { 885 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Hip_gen)); 886 } 887 CeedCall(CeedOperatorIsAtPoints(op, &is_at_points)); 888 if (is_at_points) { 889 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen)); 890 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleSingle", CeedSingleOperatorAssembleAtPoints_Hip_gen)); 891 } 892 if (!is_at_points) { 893 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunction_Hip_gen)); 894 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionUpdate_Hip_gen)); 895 } 896 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Hip_gen)); 897 CeedCallBackend(CeedDestroy(&ceed)); 898 return CEED_ERROR_SUCCESS; 899 } 900 901 //------------------------------------------------------------------------------ 902