1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed/jit-source/hip/hip-types.h> 11 #include <stddef.h> 12 #include <hip/hiprtc.h> 13 14 #include "../hip/ceed-hip-common.h" 15 #include "../hip/ceed-hip-compile.h" 16 #include "ceed-hip-gen-operator-build.h" 17 #include "ceed-hip-gen.h" 18 19 //------------------------------------------------------------------------------ 20 // Destroy operator 21 //------------------------------------------------------------------------------ 22 static int CeedOperatorDestroy_Hip_gen(CeedOperator op) { 23 Ceed ceed; 24 CeedOperator_Hip_gen *impl; 25 bool is_composite; 26 27 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 28 CeedCallBackend(CeedOperatorGetData(op, &impl)); 29 CeedCallBackend(CeedOperatorIsComposite(op, &is_composite)); 30 if (is_composite) { 31 CeedInt num_suboperators; 32 33 CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators)); 34 for (CeedInt i = 0; i < num_suboperators; i++) { 35 if (impl->streams[i]) CeedCallHip(ceed, hipStreamDestroy(impl->streams[i])); 36 impl->streams[i] = NULL; 37 } 38 } 39 if (impl->module) CeedCallHip(ceed, hipModuleUnload(impl->module)); 40 if (impl->module_assemble_full) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_full)); 41 if (impl->module_assemble_diagonal) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_diagonal)); 42 if (impl->module_assemble_qfunction) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_qfunction)); 43 if (impl->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)impl->points.num_per_elem)); 44 CeedCallBackend(CeedFree(&impl)); 45 CeedCallBackend(CeedDestroy(&ceed)); 46 return CEED_ERROR_SUCCESS; 47 } 48 49 //------------------------------------------------------------------------------ 50 // Apply and add to output 51 //------------------------------------------------------------------------------ 52 static int CeedOperatorApplyAddCore_Hip_gen(CeedOperator op, hipStream_t stream, const CeedScalar *input_arr, CeedScalar *output_arr, 53 bool *is_run_good, CeedRequest *request) { 54 bool is_at_points, is_tensor; 55 Ceed ceed; 56 CeedInt num_elem, num_input_fields, num_output_fields; 57 CeedEvalMode eval_mode; 58 CeedQFunctionField *qf_input_fields, *qf_output_fields; 59 CeedQFunction_Hip_gen *qf_data; 60 CeedQFunction qf; 61 CeedOperatorField *op_input_fields, *op_output_fields; 62 CeedOperator_Hip_gen *data; 63 64 // Creation of the operator 65 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, is_run_good)); 66 if (!(*is_run_good)) return CEED_ERROR_SUCCESS; 67 68 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 69 CeedCallBackend(CeedOperatorGetData(op, &data)); 70 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 71 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 72 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 73 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 74 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 75 76 // Input vectors 77 for (CeedInt i = 0; i < num_input_fields; i++) { 78 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 79 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 80 data->fields.inputs[i] = NULL; 81 } else { 82 bool is_active; 83 CeedVector vec; 84 85 // Get input vector 86 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 87 is_active = vec == CEED_VECTOR_ACTIVE; 88 if (is_active) data->fields.inputs[i] = input_arr; 89 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 90 CeedCallBackend(CeedVectorDestroy(&vec)); 91 } 92 } 93 94 // Output vectors 95 for (CeedInt i = 0; i < num_output_fields; i++) { 96 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); 97 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 98 data->fields.outputs[i] = NULL; 99 } else { 100 bool is_active; 101 CeedVector vec; 102 103 // Get output vector 104 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 105 is_active = vec == CEED_VECTOR_ACTIVE; 106 if (is_active) data->fields.outputs[i] = output_arr; 107 else CeedCallBackend(CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.outputs[i])); 108 CeedCallBackend(CeedVectorDestroy(&vec)); 109 } 110 } 111 112 // Point coordinates, if needed 113 CeedCallBackend(CeedOperatorIsAtPoints(op, &is_at_points)); 114 if (is_at_points) { 115 // Coords 116 CeedVector vec; 117 118 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 119 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords)); 120 CeedCallBackend(CeedVectorDestroy(&vec)); 121 122 // Points per elem 123 if (num_elem != data->points.num_elem) { 124 CeedInt *points_per_elem; 125 const CeedInt num_bytes = num_elem * sizeof(CeedInt); 126 CeedElemRestriction rstr_points = NULL; 127 128 data->points.num_elem = num_elem; 129 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL)); 130 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem)); 131 for (CeedInt e = 0; e < num_elem; e++) { 132 CeedInt num_points_elem; 133 134 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem)); 135 points_per_elem[e] = num_points_elem; 136 } 137 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem)); 138 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes)); 139 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice)); 140 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points)); 141 CeedCallBackend(CeedFree(&points_per_elem)); 142 } 143 } 144 145 // Get context data 146 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 147 148 // Apply operator 149 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points}; 150 151 CeedCallBackend(CeedOperatorHasTensorBases(op, &is_tensor)); 152 CeedInt block_sizes[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1}; 153 154 if (is_tensor) { 155 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 156 } else { 157 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64; 158 159 elems_per_block = elems_per_block > 0 ? elems_per_block : 1; 160 block_sizes[2] = elems_per_block; 161 } 162 if (data->dim == 1 || !is_tensor) { 163 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 164 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 165 166 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 167 is_run_good, opargs)); 168 } else if (data->dim == 2) { 169 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 170 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 171 172 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 173 is_run_good, opargs)); 174 } else if (data->dim == 3) { 175 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 176 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 177 178 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 179 is_run_good, opargs)); 180 } 181 182 // Restore input arrays 183 for (CeedInt i = 0; i < num_input_fields; i++) { 184 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 185 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 186 } else { 187 bool is_active; 188 CeedVector vec; 189 190 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 191 is_active = vec == CEED_VECTOR_ACTIVE; 192 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 193 CeedCallBackend(CeedVectorDestroy(&vec)); 194 } 195 } 196 197 // Restore output arrays 198 for (CeedInt i = 0; i < num_output_fields; i++) { 199 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); 200 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 201 } else { 202 bool is_active; 203 CeedVector vec; 204 205 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 206 is_active = vec == CEED_VECTOR_ACTIVE; 207 if (!is_active) CeedCallBackend(CeedVectorRestoreArray(vec, &data->fields.outputs[i])); 208 CeedCallBackend(CeedVectorDestroy(&vec)); 209 } 210 } 211 212 // Restore point coordinates, if needed 213 if (is_at_points) { 214 CeedVector vec; 215 216 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 217 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords)); 218 CeedCallBackend(CeedVectorDestroy(&vec)); 219 } 220 221 // Restore context data 222 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 223 224 // Cleanup 225 CeedCallBackend(CeedDestroy(&ceed)); 226 CeedCallBackend(CeedQFunctionDestroy(&qf)); 227 if (!(*is_run_good)) data->use_fallback = true; 228 return CEED_ERROR_SUCCESS; 229 } 230 231 static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) { 232 bool is_run_good = false; 233 const CeedScalar *input_arr = NULL; 234 CeedScalar *output_arr = NULL; 235 236 // Try to run kernel 237 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr)); 238 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr)); 239 CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(op, NULL, input_arr, output_arr, &is_run_good, request)); 240 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr)); 241 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArray(output_vec, &output_arr)); 242 243 // Fallback on unsuccessful run 244 if (!is_run_good) { 245 CeedOperator op_fallback; 246 247 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for ApplyAdd\n"); 248 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 249 CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request)); 250 } 251 return CEED_ERROR_SUCCESS; 252 } 253 254 static int CeedOperatorApplyAddComposite_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) { 255 bool is_run_good[CEED_COMPOSITE_MAX] = {false}, is_sequential; 256 CeedInt num_suboperators; 257 const CeedScalar *input_arr = NULL; 258 CeedScalar *output_arr = NULL; 259 Ceed ceed; 260 CeedOperator_Hip_gen *impl; 261 CeedOperator *sub_operators; 262 263 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 264 CeedCallBackend(CeedOperatorGetData(op, &impl)); 265 CeedCallBackend(CeedOperatorCompositeGetNumSub(op, &num_suboperators)); 266 CeedCallBackend(CeedOperatorCompositeGetSubList(op, &sub_operators)); 267 CeedCall(CeedOperatorCompositeIsSequential(op, &is_sequential)); 268 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr)); 269 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr)); 270 for (CeedInt i = 0; i < num_suboperators; i++) { 271 CeedInt num_elem = 0; 272 const CeedInt stream_index = is_sequential ? 0 : i; 273 274 CeedCallBackend(CeedOperatorGetNumElements(sub_operators[i], &num_elem)); 275 if (num_elem > 0) { 276 if (!impl->streams[stream_index]) CeedCallHip(ceed, hipStreamCreate(&impl->streams[stream_index])); 277 CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(sub_operators[i], impl->streams[stream_index], input_arr, output_arr, &is_run_good[i], 278 request)); 279 } else { 280 is_run_good[i] = true; 281 } 282 } 283 if (is_sequential) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[0])); 284 else { 285 for (CeedInt i = 0; i < num_suboperators; i++) { 286 if (impl->streams[i]) { 287 if (is_run_good[i]) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[i])); 288 } 289 } 290 } 291 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr)); 292 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArray(output_vec, &output_arr)); 293 CeedCallHip(ceed, hipDeviceSynchronize()); 294 295 // Fallback on unsuccessful run 296 for (CeedInt i = 0; i < num_suboperators; i++) { 297 if (!is_run_good[i]) { 298 CeedOperator op_fallback; 299 300 CeedDebug(ceed, "\nFalling back to /gpu/hip/ref CeedOperator for ApplyAdd\n"); 301 CeedCallBackend(CeedOperatorGetFallback(sub_operators[i], &op_fallback)); 302 CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request)); 303 } 304 } 305 CeedCallBackend(CeedDestroy(&ceed)); 306 return CEED_ERROR_SUCCESS; 307 } 308 309 //------------------------------------------------------------------------------ 310 // QFunction assembly 311 //------------------------------------------------------------------------------ 312 static int CeedOperatorLinearAssembleQFunctionCore_Hip_gen(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr, 313 CeedRequest *request) { 314 Ceed ceed; 315 CeedOperator_Hip_gen *data; 316 317 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 318 CeedCallBackend(CeedOperatorGetData(op, &data)); 319 320 // Build the assembly kernel 321 if (!data->assemble_qfunction && !data->use_assembly_fallback) { 322 bool is_build_good = false; 323 324 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good)); 325 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen(op, &is_build_good)); 326 if (!is_build_good) data->use_assembly_fallback = true; 327 } 328 329 // Try assembly 330 if (!data->use_assembly_fallback) { 331 bool is_run_good = true; 332 Ceed_Hip *hip_data; 333 CeedInt num_elem, num_input_fields, num_output_fields; 334 CeedEvalMode eval_mode; 335 CeedScalar *assembled_array; 336 CeedQFunctionField *qf_input_fields, *qf_output_fields; 337 CeedQFunction_Hip_gen *qf_data; 338 CeedQFunction qf; 339 CeedOperatorField *op_input_fields, *op_output_fields; 340 341 CeedCallBackend(CeedGetData(ceed, &hip_data)); 342 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 343 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 344 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 345 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 346 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 347 348 // Input vectors 349 for (CeedInt i = 0; i < num_input_fields; i++) { 350 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 351 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 352 data->fields.inputs[i] = NULL; 353 } else { 354 bool is_active; 355 CeedVector vec; 356 357 // Get input vector 358 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 359 is_active = vec == CEED_VECTOR_ACTIVE; 360 if (is_active) data->fields.inputs[i] = NULL; 361 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 362 CeedCallBackend(CeedVectorDestroy(&vec)); 363 } 364 } 365 366 // Get context data 367 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 368 369 // Build objects if needed 370 if (build_objects) { 371 CeedInt qf_size_in = 0, qf_size_out = 0, Q; 372 373 // Count number of active input fields 374 { 375 for (CeedInt i = 0; i < num_input_fields; i++) { 376 CeedInt field_size; 377 CeedVector vec; 378 379 // Get input vector 380 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 381 // Check if active input 382 if (vec == CEED_VECTOR_ACTIVE) { 383 CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size)); 384 qf_size_in += field_size; 385 } 386 CeedCallBackend(CeedVectorDestroy(&vec)); 387 } 388 CeedCheck(qf_size_in > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); 389 } 390 391 // Count number of active output fields 392 { 393 for (CeedInt i = 0; i < num_output_fields; i++) { 394 CeedInt field_size; 395 CeedVector vec; 396 397 // Get output vector 398 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); 399 // Check if active output 400 if (vec == CEED_VECTOR_ACTIVE) { 401 CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size)); 402 qf_size_out += field_size; 403 } 404 CeedCallBackend(CeedVectorDestroy(&vec)); 405 } 406 CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs"); 407 } 408 CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q)); 409 410 // Actually build objects now 411 const CeedSize l_size = (CeedSize)num_elem * Q * qf_size_in * qf_size_out; 412 CeedInt strides[3] = {1, num_elem * Q, Q}; /* *NOPAD* */ 413 414 // Create output restriction 415 CeedCallBackend(CeedElemRestrictionCreateStrided(ceed, num_elem, Q, qf_size_in * qf_size_out, 416 (CeedSize)qf_size_in * (CeedSize)qf_size_out * (CeedSize)num_elem * (CeedSize)Q, strides, 417 rstr)); 418 // Create assembled vector 419 CeedCallBackend(CeedVectorCreate(ceed, l_size, assembled)); 420 } 421 422 // Assembly array 423 CeedCallBackend(CeedVectorGetArrayWrite(*assembled, CEED_MEM_DEVICE, &assembled_array)); 424 425 // Assemble QFunction 426 bool is_tensor = false; 427 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points, &assembled_array}; 428 429 CeedCallBackend(CeedOperatorHasTensorBases(op, &is_tensor)); 430 CeedInt block_sizes[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1}; 431 432 if (is_tensor) { 433 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 434 } else { 435 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64; 436 437 elems_per_block = elems_per_block > 0 ? elems_per_block : 1; 438 block_sizes[2] = elems_per_block; 439 } 440 if (data->dim == 1 || !is_tensor) { 441 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 442 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 443 444 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 445 sharedMem, &is_run_good, opargs)); 446 } else if (data->dim == 2) { 447 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 448 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 449 450 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 451 sharedMem, &is_run_good, opargs)); 452 } else if (data->dim == 3) { 453 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 454 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 455 456 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 457 sharedMem, &is_run_good, opargs)); 458 } 459 460 // Restore input arrays 461 for (CeedInt i = 0; i < num_input_fields; i++) { 462 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 463 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 464 } else { 465 bool is_active; 466 CeedVector vec; 467 468 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 469 is_active = vec == CEED_VECTOR_ACTIVE; 470 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 471 CeedCallBackend(CeedVectorDestroy(&vec)); 472 } 473 } 474 475 // Restore context data 476 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 477 478 // Restore assembly array 479 CeedCallBackend(CeedVectorRestoreArray(*assembled, &assembled_array)); 480 481 // Cleanup 482 CeedCallBackend(CeedQFunctionDestroy(&qf)); 483 if (!is_run_good) { 484 data->use_assembly_fallback = true; 485 if (build_objects) { 486 CeedCallBackend(CeedVectorDestroy(assembled)); 487 CeedCallBackend(CeedElemRestrictionDestroy(rstr)); 488 } 489 } 490 } 491 CeedCallBackend(CeedDestroy(&ceed)); 492 493 // Fallback, if needed 494 if (data->use_assembly_fallback) { 495 CeedOperator op_fallback; 496 497 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for LineearAssembleQFunction\n"); 498 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 499 CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(op_fallback, assembled, rstr, request)); 500 return CEED_ERROR_SUCCESS; 501 } 502 return CEED_ERROR_SUCCESS; 503 } 504 505 static int CeedOperatorLinearAssembleQFunction_Hip_gen(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) { 506 return CeedOperatorLinearAssembleQFunctionCore_Hip_gen(op, true, assembled, rstr, request); 507 } 508 509 static int CeedOperatorLinearAssembleQFunctionUpdate_Hip_gen(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) { 510 return CeedOperatorLinearAssembleQFunctionCore_Hip_gen(op, false, &assembled, &rstr, request); 511 } 512 513 //------------------------------------------------------------------------------ 514 // AtPoints diagonal assembly 515 //------------------------------------------------------------------------------ 516 static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen(CeedOperator op, CeedVector assembled, CeedRequest *request) { 517 Ceed ceed; 518 CeedOperator_Hip_gen *data; 519 520 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 521 CeedCallBackend(CeedOperatorGetData(op, &data)); 522 523 // Build the assembly kernel 524 if (!data->assemble_diagonal && !data->use_assembly_fallback) { 525 bool is_build_good = false; 526 CeedInt num_active_bases_in, num_active_bases_out; 527 CeedOperatorAssemblyData assembly_data; 528 529 CeedCallBackend(CeedOperatorGetOperatorAssemblyData(op, &assembly_data)); 530 CeedCallBackend(CeedOperatorAssemblyDataGetEvalModes(assembly_data, &num_active_bases_in, NULL, NULL, NULL, &num_active_bases_out, NULL, NULL, 531 NULL, NULL)); 532 if (num_active_bases_in == num_active_bases_out) { 533 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good)); 534 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelDiagonalAssemblyAtPoints_Hip_gen(op, &is_build_good)); 535 } 536 if (!is_build_good) data->use_assembly_fallback = true; 537 } 538 539 // Try assembly 540 if (!data->use_assembly_fallback) { 541 bool is_run_good = true; 542 Ceed_Hip *hip_data; 543 CeedInt num_elem, num_input_fields, num_output_fields; 544 CeedEvalMode eval_mode; 545 CeedScalar *assembled_array; 546 CeedQFunctionField *qf_input_fields, *qf_output_fields; 547 CeedQFunction_Hip_gen *qf_data; 548 CeedQFunction qf; 549 CeedOperatorField *op_input_fields, *op_output_fields; 550 551 CeedCallBackend(CeedGetData(ceed, &hip_data)); 552 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 553 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 554 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 555 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 556 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 557 558 // Input vectors 559 for (CeedInt i = 0; i < num_input_fields; i++) { 560 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 561 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 562 data->fields.inputs[i] = NULL; 563 } else { 564 bool is_active; 565 CeedVector vec; 566 567 // Get input vector 568 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 569 is_active = vec == CEED_VECTOR_ACTIVE; 570 if (is_active) data->fields.inputs[i] = NULL; 571 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 572 CeedCallBackend(CeedVectorDestroy(&vec)); 573 } 574 } 575 576 // Point coordinates 577 { 578 CeedVector vec; 579 580 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 581 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords)); 582 CeedCallBackend(CeedVectorDestroy(&vec)); 583 584 // Points per elem 585 if (num_elem != data->points.num_elem) { 586 CeedInt *points_per_elem; 587 const CeedInt num_bytes = num_elem * sizeof(CeedInt); 588 CeedElemRestriction rstr_points = NULL; 589 590 data->points.num_elem = num_elem; 591 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL)); 592 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem)); 593 for (CeedInt e = 0; e < num_elem; e++) { 594 CeedInt num_points_elem; 595 596 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem)); 597 points_per_elem[e] = num_points_elem; 598 } 599 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem)); 600 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes)); 601 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice)); 602 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points)); 603 CeedCallBackend(CeedFree(&points_per_elem)); 604 } 605 } 606 607 // Get context data 608 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 609 610 // Assembly array 611 CeedCallBackend(CeedVectorGetArray(assembled, CEED_MEM_DEVICE, &assembled_array)); 612 613 // Assemble diagonal 614 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points, &assembled_array}; 615 616 CeedInt block_sizes[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1}; 617 618 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 619 block_sizes[2] = 1; 620 if (data->dim == 1) { 621 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 622 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 623 624 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 625 sharedMem, &is_run_good, opargs)); 626 } else if (data->dim == 2) { 627 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 628 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 629 630 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 631 sharedMem, &is_run_good, opargs)); 632 } else if (data->dim == 3) { 633 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 634 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 635 636 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], 637 sharedMem, &is_run_good, opargs)); 638 } 639 CeedCallHip(ceed, hipDeviceSynchronize()); 640 641 // Restore input arrays 642 for (CeedInt i = 0; i < num_input_fields; i++) { 643 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 644 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 645 } else { 646 bool is_active; 647 CeedVector vec; 648 649 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 650 is_active = vec == CEED_VECTOR_ACTIVE; 651 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 652 CeedCallBackend(CeedVectorDestroy(&vec)); 653 } 654 } 655 656 // Restore point coordinates 657 { 658 CeedVector vec; 659 660 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 661 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords)); 662 CeedCallBackend(CeedVectorDestroy(&vec)); 663 } 664 665 // Restore context data 666 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 667 668 // Restore assembly array 669 CeedCallBackend(CeedVectorRestoreArray(assembled, &assembled_array)); 670 671 // Cleanup 672 CeedCallBackend(CeedQFunctionDestroy(&qf)); 673 if (!is_run_good) data->use_assembly_fallback = true; 674 } 675 CeedCallBackend(CeedDestroy(&ceed)); 676 677 // Fallback, if needed 678 if (data->use_assembly_fallback) { 679 CeedOperator op_fallback; 680 681 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for AtPoints LinearAssembleAddDiagonal\n"); 682 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 683 CeedCallBackend(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request)); 684 return CEED_ERROR_SUCCESS; 685 } 686 return CEED_ERROR_SUCCESS; 687 } 688 689 //------------------------------------------------------------------------------ 690 // AtPoints full assembly 691 //------------------------------------------------------------------------------ 692 static int CeedOperatorAssembleSingleAtPoints_Hip_gen(CeedOperator op, CeedInt offset, CeedVector assembled) { 693 Ceed ceed; 694 CeedOperator_Hip_gen *data; 695 696 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 697 CeedCallBackend(CeedOperatorGetData(op, &data)); 698 699 // Build the assembly kernel 700 if (!data->assemble_full && !data->use_assembly_fallback) { 701 bool is_build_good = false; 702 CeedInt num_active_bases_in, num_active_bases_out; 703 CeedOperatorAssemblyData assembly_data; 704 705 CeedCallBackend(CeedOperatorGetOperatorAssemblyData(op, &assembly_data)); 706 CeedCallBackend(CeedOperatorAssemblyDataGetEvalModes(assembly_data, &num_active_bases_in, NULL, NULL, NULL, &num_active_bases_out, NULL, NULL, 707 NULL, NULL)); 708 if (num_active_bases_in == num_active_bases_out) { 709 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good)); 710 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelFullAssemblyAtPoints_Hip_gen(op, &is_build_good)); 711 } 712 if (!is_build_good) { 713 CeedDebug(ceed, "Single Operator Assemble at Points compile failed, using fallback\n"); 714 data->use_assembly_fallback = true; 715 } 716 } 717 718 // Try assembly 719 if (!data->use_assembly_fallback) { 720 bool is_run_good = true; 721 Ceed_Hip *Hip_data; 722 CeedInt num_elem, num_input_fields, num_output_fields; 723 CeedEvalMode eval_mode; 724 CeedScalar *assembled_array; 725 CeedQFunctionField *qf_input_fields, *qf_output_fields; 726 CeedQFunction_Hip_gen *qf_data; 727 CeedQFunction qf; 728 CeedOperatorField *op_input_fields, *op_output_fields; 729 730 CeedCallBackend(CeedGetData(ceed, &Hip_data)); 731 CeedCallBackend(CeedOperatorGetQFunction(op, &qf)); 732 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data)); 733 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem)); 734 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields)); 735 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields)); 736 CeedDebug(ceed, "Running single operator assemble for /gpu/hip/gen\n"); 737 738 // Input vectors 739 for (CeedInt i = 0; i < num_input_fields; i++) { 740 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 741 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 742 data->fields.inputs[i] = NULL; 743 } else { 744 bool is_active; 745 CeedVector vec; 746 747 // Get input vector 748 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 749 is_active = vec == CEED_VECTOR_ACTIVE; 750 if (is_active) data->fields.inputs[i] = NULL; 751 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i])); 752 CeedCallBackend(CeedVectorDestroy(&vec)); 753 } 754 } 755 756 // Point coordinates 757 { 758 CeedVector vec; 759 760 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 761 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords)); 762 CeedCallBackend(CeedVectorDestroy(&vec)); 763 764 // Points per elem 765 if (num_elem != data->points.num_elem) { 766 CeedInt *points_per_elem; 767 const CeedInt num_bytes = num_elem * sizeof(CeedInt); 768 CeedElemRestriction rstr_points = NULL; 769 770 data->points.num_elem = num_elem; 771 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL)); 772 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem)); 773 for (CeedInt e = 0; e < num_elem; e++) { 774 CeedInt num_points_elem; 775 776 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem)); 777 points_per_elem[e] = num_points_elem; 778 } 779 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem)); 780 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes)); 781 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice)); 782 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points)); 783 CeedCallBackend(CeedFree(&points_per_elem)); 784 } 785 } 786 787 // Get context data 788 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c)); 789 790 // Assembly array 791 CeedCallBackend(CeedVectorGetArray(assembled, CEED_MEM_DEVICE, &assembled_array)); 792 CeedScalar *assembled_offset_array = &assembled_array[offset]; 793 794 // Assemble diagonal 795 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, 796 &data->G, &data->W, &data->points, &assembled_offset_array}; 797 798 CeedInt block_sizes[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1}; 799 800 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes)); 801 block_sizes[2] = 1; 802 if (data->dim == 1) { 803 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 804 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar); 805 806 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 807 &is_run_good, opargs)); 808 } else if (data->dim == 2) { 809 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 810 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 811 812 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 813 &is_run_good, opargs)); 814 } else if (data->dim == 3) { 815 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0); 816 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar); 817 818 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem, 819 &is_run_good, opargs)); 820 } 821 CeedCallHip(ceed, hipDeviceSynchronize()); 822 823 // Restore input arrays 824 for (CeedInt i = 0; i < num_input_fields; i++) { 825 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); 826 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip 827 } else { 828 bool is_active; 829 CeedVector vec; 830 831 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); 832 is_active = vec == CEED_VECTOR_ACTIVE; 833 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i])); 834 CeedCallBackend(CeedVectorDestroy(&vec)); 835 } 836 } 837 838 // Restore point coordinates 839 { 840 CeedVector vec; 841 842 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec)); 843 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords)); 844 CeedCallBackend(CeedVectorDestroy(&vec)); 845 } 846 847 // Restore context data 848 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c)); 849 850 // Restore assembly array 851 CeedCallBackend(CeedVectorRestoreArray(assembled, &assembled_array)); 852 853 // Cleanup 854 CeedCallBackend(CeedQFunctionDestroy(&qf)); 855 if (!is_run_good) { 856 CeedDebug(ceed, "Single Operator Assemble at Points run failed, using fallback\n"); 857 data->use_assembly_fallback = true; 858 } 859 } 860 CeedCallBackend(CeedDestroy(&ceed)); 861 862 // Fallback, if needed 863 if (data->use_assembly_fallback) { 864 CeedOperator op_fallback; 865 866 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for AtPoints SingleOperatorAssemble\n"); 867 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback)); 868 CeedCallBackend(CeedOperatorAssembleSingle(op_fallback, offset, assembled)); 869 return CEED_ERROR_SUCCESS; 870 } 871 return CEED_ERROR_SUCCESS; 872 } 873 874 //------------------------------------------------------------------------------ 875 // Create operator 876 //------------------------------------------------------------------------------ 877 int CeedOperatorCreate_Hip_gen(CeedOperator op) { 878 bool is_composite, is_at_points; 879 Ceed ceed; 880 CeedOperator_Hip_gen *impl; 881 882 CeedCallBackend(CeedOperatorGetCeed(op, &ceed)); 883 CeedCallBackend(CeedCalloc(1, &impl)); 884 CeedCallBackend(CeedOperatorSetData(op, impl)); 885 CeedCall(CeedOperatorIsComposite(op, &is_composite)); 886 if (is_composite) { 887 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAddComposite", CeedOperatorApplyAddComposite_Hip_gen)); 888 } else { 889 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Hip_gen)); 890 } 891 CeedCall(CeedOperatorIsAtPoints(op, &is_at_points)); 892 if (is_at_points) { 893 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen)); 894 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleSingle", CeedOperatorAssembleSingleAtPoints_Hip_gen)); 895 } 896 if (!is_at_points) { 897 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunction_Hip_gen)); 898 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionUpdate_Hip_gen)); 899 } 900 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Hip_gen)); 901 CeedCallBackend(CeedDestroy(&ceed)); 902 return CEED_ERROR_SUCCESS; 903 } 904 905 //------------------------------------------------------------------------------ 906