1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed/jit-source/hip/hip-types.h>
11 #include <stddef.h>
12 #include <hip/hiprtc.h>
13
14 #include "../hip/ceed-hip-common.h"
15 #include "../hip/ceed-hip-compile.h"
16 #include "ceed-hip-gen-operator-build.h"
17 #include "ceed-hip-gen.h"
18
19 //------------------------------------------------------------------------------
20 // Destroy operator
21 //------------------------------------------------------------------------------
CeedOperatorDestroy_Hip_gen(CeedOperator op)22 static int CeedOperatorDestroy_Hip_gen(CeedOperator op) {
23 Ceed ceed;
24 CeedOperator_Hip_gen *impl;
25 bool is_composite;
26
27 CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
28 CeedCallBackend(CeedOperatorGetData(op, &impl));
29 CeedCallBackend(CeedOperatorIsComposite(op, &is_composite));
30 if (is_composite) {
31 CeedInt num_suboperators;
32
33 CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
34 for (CeedInt i = 0; i < num_suboperators; i++) {
35 if (impl->streams[i]) CeedCallHip(ceed, hipStreamDestroy(impl->streams[i]));
36 impl->streams[i] = NULL;
37 }
38 }
39 if (impl->module) CeedCallHip(ceed, hipModuleUnload(impl->module));
40 if (impl->module_assemble_full) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_full));
41 if (impl->module_assemble_diagonal) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_diagonal));
42 if (impl->module_assemble_qfunction) CeedCallHip(ceed, hipModuleUnload(impl->module_assemble_qfunction));
43 if (impl->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)impl->points.num_per_elem));
44 CeedCallBackend(CeedFree(&impl));
45 CeedCallBackend(CeedDestroy(&ceed));
46 return CEED_ERROR_SUCCESS;
47 }
48
49 //------------------------------------------------------------------------------
50 // Apply and add to output
51 //------------------------------------------------------------------------------
CeedOperatorApplyAddCore_Hip_gen(CeedOperator op,hipStream_t stream,const CeedScalar * input_arr,CeedScalar * output_arr,bool * is_run_good,CeedRequest * request)52 static int CeedOperatorApplyAddCore_Hip_gen(CeedOperator op, hipStream_t stream, const CeedScalar *input_arr, CeedScalar *output_arr,
53 bool *is_run_good, CeedRequest *request) {
54 bool is_at_points, is_tensor;
55 Ceed ceed;
56 CeedInt num_elem, num_input_fields, num_output_fields;
57 CeedEvalMode eval_mode;
58 CeedQFunctionField *qf_input_fields, *qf_output_fields;
59 CeedQFunction_Hip_gen *qf_data;
60 CeedQFunction qf;
61 CeedOperatorField *op_input_fields, *op_output_fields;
62 CeedOperator_Hip_gen *data;
63
64 // Creation of the operator
65 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, is_run_good));
66 if (!(*is_run_good)) return CEED_ERROR_SUCCESS;
67
68 CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
69 CeedCallBackend(CeedOperatorGetData(op, &data));
70 CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
71 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data));
72 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
73 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
74 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
75
76 // Input vectors
77 for (CeedInt i = 0; i < num_input_fields; i++) {
78 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
79 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
80 data->fields.inputs[i] = NULL;
81 } else {
82 bool is_active;
83 CeedVector vec;
84
85 // Get input vector
86 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
87 is_active = vec == CEED_VECTOR_ACTIVE;
88 if (is_active) data->fields.inputs[i] = input_arr;
89 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i]));
90 CeedCallBackend(CeedVectorDestroy(&vec));
91 }
92 }
93
94 // Output vectors
95 for (CeedInt i = 0; i < num_output_fields; i++) {
96 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode));
97 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
98 data->fields.outputs[i] = NULL;
99 } else {
100 bool is_active;
101 CeedVector vec;
102
103 // Get output vector
104 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
105 is_active = vec == CEED_VECTOR_ACTIVE;
106 if (is_active) data->fields.outputs[i] = output_arr;
107 else CeedCallBackend(CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.outputs[i]));
108 CeedCallBackend(CeedVectorDestroy(&vec));
109 }
110 }
111
112 // Point coordinates, if needed
113 CeedCallBackend(CeedOperatorIsAtPoints(op, &is_at_points));
114 if (is_at_points) {
115 // Coords
116 CeedVector vec;
117
118 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec));
119 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords));
120 CeedCallBackend(CeedVectorDestroy(&vec));
121
122 // Points per elem
123 if (num_elem != data->points.num_elem) {
124 CeedInt *points_per_elem;
125 const CeedInt num_bytes = num_elem * sizeof(CeedInt);
126 CeedElemRestriction rstr_points = NULL;
127
128 data->points.num_elem = num_elem;
129 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL));
130 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem));
131 for (CeedInt e = 0; e < num_elem; e++) {
132 CeedInt num_points_elem;
133
134 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem));
135 points_per_elem[e] = num_points_elem;
136 }
137 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem));
138 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes));
139 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice));
140 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
141 CeedCallBackend(CeedFree(&points_per_elem));
142 }
143 }
144
145 // Get context data
146 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c));
147
148 // Apply operator
149 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points};
150
151 CeedCallBackend(CeedOperatorHasTensorBases(op, &is_tensor));
152 CeedInt block_sizes[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1};
153
154 if (is_tensor) {
155 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes));
156 } else {
157 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64;
158
159 elems_per_block = elems_per_block > 0 ? elems_per_block : 1;
160 block_sizes[2] = elems_per_block;
161 }
162 if (data->dim == 1 || !is_tensor) {
163 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
164 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar);
165
166 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem,
167 is_run_good, opargs));
168 } else if (data->dim == 2) {
169 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
170 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
171
172 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem,
173 is_run_good, opargs));
174 } else if (data->dim == 3) {
175 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
176 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
177
178 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->op, stream, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem,
179 is_run_good, opargs));
180 }
181
182 // Restore input arrays
183 for (CeedInt i = 0; i < num_input_fields; i++) {
184 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
185 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
186 } else {
187 bool is_active;
188 CeedVector vec;
189
190 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
191 is_active = vec == CEED_VECTOR_ACTIVE;
192 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i]));
193 CeedCallBackend(CeedVectorDestroy(&vec));
194 }
195 }
196
197 // Restore output arrays
198 for (CeedInt i = 0; i < num_output_fields; i++) {
199 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode));
200 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
201 } else {
202 bool is_active;
203 CeedVector vec;
204
205 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
206 is_active = vec == CEED_VECTOR_ACTIVE;
207 if (!is_active) CeedCallBackend(CeedVectorRestoreArray(vec, &data->fields.outputs[i]));
208 CeedCallBackend(CeedVectorDestroy(&vec));
209 }
210 }
211
212 // Restore point coordinates, if needed
213 if (is_at_points) {
214 CeedVector vec;
215
216 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec));
217 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords));
218 CeedCallBackend(CeedVectorDestroy(&vec));
219 }
220
221 // Restore context data
222 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c));
223
224 // Cleanup
225 CeedCallBackend(CeedDestroy(&ceed));
226 CeedCallBackend(CeedQFunctionDestroy(&qf));
227 if (!(*is_run_good)) data->use_fallback = true;
228 return CEED_ERROR_SUCCESS;
229 }
230
CeedOperatorApplyAdd_Hip_gen(CeedOperator op,CeedVector input_vec,CeedVector output_vec,CeedRequest * request)231 static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) {
232 bool is_run_good = false;
233 const CeedScalar *input_arr = NULL;
234 CeedScalar *output_arr = NULL;
235
236 // Try to run kernel
237 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr));
238 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr));
239 CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(op, NULL, input_arr, output_arr, &is_run_good, request));
240 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr));
241 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArray(output_vec, &output_arr));
242
243 // Fallback on unsuccessful run
244 if (!is_run_good) {
245 CeedOperator op_fallback;
246
247 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for ApplyAdd\n");
248 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback));
249 CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request));
250 }
251 return CEED_ERROR_SUCCESS;
252 }
253
CeedOperatorApplyAddComposite_Hip_gen(CeedOperator op,CeedVector input_vec,CeedVector output_vec,CeedRequest * request)254 static int CeedOperatorApplyAddComposite_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) {
255 bool is_run_good[CEED_COMPOSITE_MAX] = {false}, is_sequential;
256 CeedInt num_suboperators;
257 const CeedScalar *input_arr = NULL;
258 CeedScalar *output_arr = NULL;
259 Ceed ceed;
260 CeedOperator_Hip_gen *impl;
261 CeedOperator *sub_operators;
262
263 CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
264 CeedCallBackend(CeedOperatorGetData(op, &impl));
265 CeedCallBackend(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
266 CeedCallBackend(CeedOperatorCompositeGetSubList(op, &sub_operators));
267 CeedCall(CeedOperatorCompositeIsSequential(op, &is_sequential));
268 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr));
269 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr));
270 for (CeedInt i = 0; i < num_suboperators; i++) {
271 CeedInt num_elem = 0;
272 const CeedInt stream_index = is_sequential ? 0 : i;
273
274 CeedCallBackend(CeedOperatorGetNumElements(sub_operators[i], &num_elem));
275 if (num_elem > 0) {
276 if (!impl->streams[stream_index]) CeedCallHip(ceed, hipStreamCreate(&impl->streams[stream_index]));
277 CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(sub_operators[i], impl->streams[stream_index], input_arr, output_arr, &is_run_good[i],
278 request));
279 } else {
280 is_run_good[i] = true;
281 }
282 }
283 if (is_sequential) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[0]));
284 else {
285 for (CeedInt i = 0; i < num_suboperators; i++) {
286 if (impl->streams[i]) {
287 if (is_run_good[i]) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[i]));
288 }
289 }
290 }
291 if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr));
292 if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArray(output_vec, &output_arr));
293 CeedCallHip(ceed, hipDeviceSynchronize());
294
295 // Fallback on unsuccessful run
296 for (CeedInt i = 0; i < num_suboperators; i++) {
297 if (!is_run_good[i]) {
298 CeedOperator op_fallback;
299
300 CeedDebug(ceed, "\nFalling back to /gpu/hip/ref CeedOperator for ApplyAdd\n");
301 CeedCallBackend(CeedOperatorGetFallback(sub_operators[i], &op_fallback));
302 CeedCallBackend(CeedOperatorApplyAdd(op_fallback, input_vec, output_vec, request));
303 }
304 }
305 CeedCallBackend(CeedDestroy(&ceed));
306 return CEED_ERROR_SUCCESS;
307 }
308
309 //------------------------------------------------------------------------------
310 // QFunction assembly
311 //------------------------------------------------------------------------------
CeedOperatorLinearAssembleQFunctionCore_Hip_gen(CeedOperator op,bool build_objects,CeedVector * assembled,CeedElemRestriction * rstr,CeedRequest * request)312 static int CeedOperatorLinearAssembleQFunctionCore_Hip_gen(CeedOperator op, bool build_objects, CeedVector *assembled, CeedElemRestriction *rstr,
313 CeedRequest *request) {
314 Ceed ceed;
315 CeedOperator_Hip_gen *data;
316
317 CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
318 CeedCallBackend(CeedOperatorGetData(op, &data));
319
320 // Build the assembly kernel
321 if (!data->assemble_qfunction && !data->use_assembly_fallback) {
322 bool is_build_good = false;
323
324 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good));
325 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen(op, &is_build_good));
326 if (!is_build_good) data->use_assembly_fallback = true;
327 }
328
329 // Try assembly
330 if (!data->use_assembly_fallback) {
331 bool is_run_good = true;
332 Ceed_Hip *hip_data;
333 CeedInt num_elem, num_input_fields, num_output_fields;
334 CeedEvalMode eval_mode;
335 CeedScalar *assembled_array;
336 CeedQFunctionField *qf_input_fields, *qf_output_fields;
337 CeedQFunction_Hip_gen *qf_data;
338 CeedQFunction qf;
339 CeedOperatorField *op_input_fields, *op_output_fields;
340
341 CeedCallBackend(CeedGetData(ceed, &hip_data));
342 CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
343 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data));
344 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
345 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
346 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
347
348 // Input vectors
349 for (CeedInt i = 0; i < num_input_fields; i++) {
350 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
351 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
352 data->fields.inputs[i] = NULL;
353 } else {
354 bool is_active;
355 CeedVector vec;
356
357 // Get input vector
358 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
359 is_active = vec == CEED_VECTOR_ACTIVE;
360 if (is_active) data->fields.inputs[i] = NULL;
361 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i]));
362 CeedCallBackend(CeedVectorDestroy(&vec));
363 }
364 }
365
366 // Get context data
367 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c));
368
369 // Build objects if needed
370 if (build_objects) {
371 CeedInt qf_size_in = 0, qf_size_out = 0, Q;
372
373 // Count number of active input fields
374 {
375 for (CeedInt i = 0; i < num_input_fields; i++) {
376 CeedInt field_size;
377 CeedVector vec;
378
379 // Get input vector
380 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
381 // Check if active input
382 if (vec == CEED_VECTOR_ACTIVE) {
383 CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &field_size));
384 qf_size_in += field_size;
385 }
386 CeedCallBackend(CeedVectorDestroy(&vec));
387 }
388 CeedCheck(qf_size_in > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs");
389 }
390
391 // Count number of active output fields
392 {
393 for (CeedInt i = 0; i < num_output_fields; i++) {
394 CeedInt field_size;
395 CeedVector vec;
396
397 // Get output vector
398 CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
399 // Check if active output
400 if (vec == CEED_VECTOR_ACTIVE) {
401 CeedCallBackend(CeedQFunctionFieldGetSize(qf_output_fields[i], &field_size));
402 qf_size_out += field_size;
403 }
404 CeedCallBackend(CeedVectorDestroy(&vec));
405 }
406 CeedCheck(qf_size_out > 0, ceed, CEED_ERROR_BACKEND, "Cannot assemble QFunction without active inputs and outputs");
407 }
408 CeedCallBackend(CeedOperatorGetNumQuadraturePoints(op, &Q));
409
410 // Actually build objects now
411 const CeedSize l_size = (CeedSize)num_elem * Q * qf_size_in * qf_size_out;
412 CeedInt strides[3] = {1, num_elem * Q, Q}; /* *NOPAD* */
413
414 // Create output restriction
415 CeedCallBackend(CeedElemRestrictionCreateStrided(ceed, num_elem, Q, qf_size_in * qf_size_out,
416 (CeedSize)qf_size_in * (CeedSize)qf_size_out * (CeedSize)num_elem * (CeedSize)Q, strides,
417 rstr));
418 // Create assembled vector
419 CeedCallBackend(CeedVectorCreate(ceed, l_size, assembled));
420 }
421
422 // Assembly array
423 CeedCallBackend(CeedVectorGetArrayWrite(*assembled, CEED_MEM_DEVICE, &assembled_array));
424
425 // Assemble QFunction
426 bool is_tensor = false;
427 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points, &assembled_array};
428
429 CeedCallBackend(CeedOperatorHasTensorBases(op, &is_tensor));
430 CeedInt block_sizes[3] = {data->thread_1d, ((!is_tensor || data->dim == 1) ? 1 : data->thread_1d), -1};
431
432 if (is_tensor) {
433 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes));
434 } else {
435 CeedInt elems_per_block = 64 * data->thread_1d > 256 ? 256 / data->thread_1d : 64;
436
437 elems_per_block = elems_per_block > 0 ? elems_per_block : 1;
438 block_sizes[2] = elems_per_block;
439 }
440 if (data->dim == 1 || !is_tensor) {
441 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
442 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar);
443
444 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2],
445 sharedMem, &is_run_good, opargs));
446 } else if (data->dim == 2) {
447 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
448 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
449
450 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2],
451 sharedMem, &is_run_good, opargs));
452 } else if (data->dim == 3) {
453 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
454 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
455
456 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_qfunction, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2],
457 sharedMem, &is_run_good, opargs));
458 }
459
460 // Restore input arrays
461 for (CeedInt i = 0; i < num_input_fields; i++) {
462 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
463 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
464 } else {
465 bool is_active;
466 CeedVector vec;
467
468 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
469 is_active = vec == CEED_VECTOR_ACTIVE;
470 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i]));
471 CeedCallBackend(CeedVectorDestroy(&vec));
472 }
473 }
474
475 // Restore context data
476 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c));
477
478 // Restore assembly array
479 CeedCallBackend(CeedVectorRestoreArray(*assembled, &assembled_array));
480
481 // Cleanup
482 CeedCallBackend(CeedQFunctionDestroy(&qf));
483 if (!is_run_good) {
484 data->use_assembly_fallback = true;
485 if (build_objects) {
486 CeedCallBackend(CeedVectorDestroy(assembled));
487 CeedCallBackend(CeedElemRestrictionDestroy(rstr));
488 }
489 }
490 }
491 CeedCallBackend(CeedDestroy(&ceed));
492
493 // Fallback, if needed
494 if (data->use_assembly_fallback) {
495 CeedOperator op_fallback;
496
497 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for LineearAssembleQFunction\n");
498 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback));
499 CeedCallBackend(CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(op_fallback, assembled, rstr, request));
500 return CEED_ERROR_SUCCESS;
501 }
502 return CEED_ERROR_SUCCESS;
503 }
504
CeedOperatorLinearAssembleQFunction_Hip_gen(CeedOperator op,CeedVector * assembled,CeedElemRestriction * rstr,CeedRequest * request)505 static int CeedOperatorLinearAssembleQFunction_Hip_gen(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
506 return CeedOperatorLinearAssembleQFunctionCore_Hip_gen(op, true, assembled, rstr, request);
507 }
508
CeedOperatorLinearAssembleQFunctionUpdate_Hip_gen(CeedOperator op,CeedVector assembled,CeedElemRestriction rstr,CeedRequest * request)509 static int CeedOperatorLinearAssembleQFunctionUpdate_Hip_gen(CeedOperator op, CeedVector assembled, CeedElemRestriction rstr, CeedRequest *request) {
510 return CeedOperatorLinearAssembleQFunctionCore_Hip_gen(op, false, &assembled, &rstr, request);
511 }
512
513 //------------------------------------------------------------------------------
514 // AtPoints diagonal assembly
515 //------------------------------------------------------------------------------
CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen(CeedOperator op,CeedVector assembled,CeedRequest * request)516 static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen(CeedOperator op, CeedVector assembled, CeedRequest *request) {
517 Ceed ceed;
518 CeedOperator_Hip_gen *data;
519
520 CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
521 CeedCallBackend(CeedOperatorGetData(op, &data));
522
523 // Build the assembly kernel
524 if (!data->assemble_diagonal && !data->use_assembly_fallback) {
525 bool is_build_good = false;
526 CeedInt num_active_bases_in, num_active_bases_out;
527 CeedOperatorAssemblyData assembly_data;
528
529 CeedCallBackend(CeedOperatorGetOperatorAssemblyData(op, &assembly_data));
530 CeedCallBackend(CeedOperatorAssemblyDataGetEvalModes(assembly_data, &num_active_bases_in, NULL, NULL, NULL, &num_active_bases_out, NULL, NULL,
531 NULL, NULL));
532 if (num_active_bases_in == num_active_bases_out) {
533 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good));
534 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelDiagonalAssemblyAtPoints_Hip_gen(op, &is_build_good));
535 }
536 if (!is_build_good) data->use_assembly_fallback = true;
537 }
538
539 // Try assembly
540 if (!data->use_assembly_fallback) {
541 bool is_run_good = true;
542 Ceed_Hip *hip_data;
543 CeedInt num_elem, num_input_fields, num_output_fields;
544 CeedEvalMode eval_mode;
545 CeedScalar *assembled_array;
546 CeedQFunctionField *qf_input_fields, *qf_output_fields;
547 CeedQFunction_Hip_gen *qf_data;
548 CeedQFunction qf;
549 CeedOperatorField *op_input_fields, *op_output_fields;
550
551 CeedCallBackend(CeedGetData(ceed, &hip_data));
552 CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
553 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data));
554 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
555 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
556 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
557
558 // Input vectors
559 for (CeedInt i = 0; i < num_input_fields; i++) {
560 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
561 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
562 data->fields.inputs[i] = NULL;
563 } else {
564 bool is_active;
565 CeedVector vec;
566
567 // Get input vector
568 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
569 is_active = vec == CEED_VECTOR_ACTIVE;
570 if (is_active) data->fields.inputs[i] = NULL;
571 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i]));
572 CeedCallBackend(CeedVectorDestroy(&vec));
573 }
574 }
575
576 // Point coordinates
577 {
578 CeedVector vec;
579
580 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec));
581 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords));
582 CeedCallBackend(CeedVectorDestroy(&vec));
583
584 // Points per elem
585 if (num_elem != data->points.num_elem) {
586 CeedInt *points_per_elem;
587 const CeedInt num_bytes = num_elem * sizeof(CeedInt);
588 CeedElemRestriction rstr_points = NULL;
589
590 data->points.num_elem = num_elem;
591 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL));
592 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem));
593 for (CeedInt e = 0; e < num_elem; e++) {
594 CeedInt num_points_elem;
595
596 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem));
597 points_per_elem[e] = num_points_elem;
598 }
599 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem));
600 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes));
601 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice));
602 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
603 CeedCallBackend(CeedFree(&points_per_elem));
604 }
605 }
606
607 // Get context data
608 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c));
609
610 // Assembly array
611 CeedCallBackend(CeedVectorGetArray(assembled, CEED_MEM_DEVICE, &assembled_array));
612
613 // Assemble diagonal
614 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B, &data->G, &data->W, &data->points, &assembled_array};
615
616 CeedInt block_sizes[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1};
617
618 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes));
619 block_sizes[2] = 1;
620 if (data->dim == 1) {
621 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
622 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar);
623
624 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2],
625 sharedMem, &is_run_good, opargs));
626 } else if (data->dim == 2) {
627 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
628 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
629
630 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2],
631 sharedMem, &is_run_good, opargs));
632 } else if (data->dim == 3) {
633 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
634 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
635
636 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_diagonal, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2],
637 sharedMem, &is_run_good, opargs));
638 }
639 CeedCallHip(ceed, hipDeviceSynchronize());
640
641 // Restore input arrays
642 for (CeedInt i = 0; i < num_input_fields; i++) {
643 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
644 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
645 } else {
646 bool is_active;
647 CeedVector vec;
648
649 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
650 is_active = vec == CEED_VECTOR_ACTIVE;
651 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i]));
652 CeedCallBackend(CeedVectorDestroy(&vec));
653 }
654 }
655
656 // Restore point coordinates
657 {
658 CeedVector vec;
659
660 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec));
661 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords));
662 CeedCallBackend(CeedVectorDestroy(&vec));
663 }
664
665 // Restore context data
666 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c));
667
668 // Restore assembly array
669 CeedCallBackend(CeedVectorRestoreArray(assembled, &assembled_array));
670
671 // Cleanup
672 CeedCallBackend(CeedQFunctionDestroy(&qf));
673 if (!is_run_good) data->use_assembly_fallback = true;
674 }
675 CeedCallBackend(CeedDestroy(&ceed));
676
677 // Fallback, if needed
678 if (data->use_assembly_fallback) {
679 CeedOperator op_fallback;
680
681 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for AtPoints LinearAssembleAddDiagonal\n");
682 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback));
683 CeedCallBackend(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
684 return CEED_ERROR_SUCCESS;
685 }
686 return CEED_ERROR_SUCCESS;
687 }
688
689 //------------------------------------------------------------------------------
690 // AtPoints full assembly
691 //------------------------------------------------------------------------------
CeedOperatorAssembleSingleAtPoints_Hip_gen(CeedOperator op,CeedInt offset,CeedVector assembled)692 static int CeedOperatorAssembleSingleAtPoints_Hip_gen(CeedOperator op, CeedInt offset, CeedVector assembled) {
693 Ceed ceed;
694 CeedOperator_Hip_gen *data;
695
696 CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
697 CeedCallBackend(CeedOperatorGetData(op, &data));
698
699 // Build the assembly kernel
700 if (!data->assemble_full && !data->use_assembly_fallback) {
701 bool is_build_good = false;
702 CeedInt num_active_bases_in, num_active_bases_out;
703 CeedOperatorAssemblyData assembly_data;
704
705 CeedCallBackend(CeedOperatorGetOperatorAssemblyData(op, &assembly_data));
706 CeedCallBackend(CeedOperatorAssemblyDataGetEvalModes(assembly_data, &num_active_bases_in, NULL, NULL, NULL, &num_active_bases_out, NULL, NULL,
707 NULL, NULL));
708 if (num_active_bases_in == num_active_bases_out) {
709 CeedCallBackend(CeedOperatorBuildKernel_Hip_gen(op, &is_build_good));
710 if (is_build_good) CeedCallBackend(CeedOperatorBuildKernelFullAssemblyAtPoints_Hip_gen(op, &is_build_good));
711 }
712 if (!is_build_good) {
713 CeedDebug(ceed, "Single Operator Assemble at Points compile failed, using fallback\n");
714 data->use_assembly_fallback = true;
715 }
716 }
717
718 // Try assembly
719 if (!data->use_assembly_fallback) {
720 bool is_run_good = true;
721 Ceed_Hip *Hip_data;
722 CeedInt num_elem, num_input_fields, num_output_fields;
723 CeedEvalMode eval_mode;
724 CeedScalar *assembled_array;
725 CeedQFunctionField *qf_input_fields, *qf_output_fields;
726 CeedQFunction_Hip_gen *qf_data;
727 CeedQFunction qf;
728 CeedOperatorField *op_input_fields, *op_output_fields;
729
730 CeedCallBackend(CeedGetData(ceed, &Hip_data));
731 CeedCallBackend(CeedOperatorGetQFunction(op, &qf));
732 CeedCallBackend(CeedQFunctionGetData(qf, &qf_data));
733 CeedCallBackend(CeedOperatorGetNumElements(op, &num_elem));
734 CeedCallBackend(CeedOperatorGetFields(op, &num_input_fields, &op_input_fields, &num_output_fields, &op_output_fields));
735 CeedCallBackend(CeedQFunctionGetFields(qf, NULL, &qf_input_fields, NULL, &qf_output_fields));
736 CeedDebug(ceed, "Running single operator assemble for /gpu/hip/gen\n");
737
738 // Input vectors
739 for (CeedInt i = 0; i < num_input_fields; i++) {
740 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
741 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
742 data->fields.inputs[i] = NULL;
743 } else {
744 bool is_active;
745 CeedVector vec;
746
747 // Get input vector
748 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
749 is_active = vec == CEED_VECTOR_ACTIVE;
750 if (is_active) data->fields.inputs[i] = NULL;
751 else CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.inputs[i]));
752 CeedCallBackend(CeedVectorDestroy(&vec));
753 }
754 }
755
756 // Point coordinates
757 {
758 CeedVector vec;
759
760 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec));
761 CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->points.coords));
762 CeedCallBackend(CeedVectorDestroy(&vec));
763
764 // Points per elem
765 if (num_elem != data->points.num_elem) {
766 CeedInt *points_per_elem;
767 const CeedInt num_bytes = num_elem * sizeof(CeedInt);
768 CeedElemRestriction rstr_points = NULL;
769
770 data->points.num_elem = num_elem;
771 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, &rstr_points, NULL));
772 CeedCallBackend(CeedCalloc(num_elem, &points_per_elem));
773 for (CeedInt e = 0; e < num_elem; e++) {
774 CeedInt num_points_elem;
775
776 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr_points, e, &num_points_elem));
777 points_per_elem[e] = num_points_elem;
778 }
779 if (data->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)data->points.num_per_elem));
780 CeedCallHip(ceed, hipMalloc((void **)&data->points.num_per_elem, num_bytes));
781 CeedCallHip(ceed, hipMemcpy((void *)data->points.num_per_elem, points_per_elem, num_bytes, hipMemcpyHostToDevice));
782 CeedCallBackend(CeedElemRestrictionDestroy(&rstr_points));
783 CeedCallBackend(CeedFree(&points_per_elem));
784 }
785 }
786
787 // Get context data
788 CeedCallBackend(CeedQFunctionGetInnerContextData(qf, CEED_MEM_DEVICE, &qf_data->d_c));
789
790 // Assembly array
791 CeedCallBackend(CeedVectorGetArray(assembled, CEED_MEM_DEVICE, &assembled_array));
792 CeedScalar *assembled_offset_array = &assembled_array[offset];
793
794 // Assemble diagonal
795 void *opargs[] = {(void *)&num_elem, &qf_data->d_c, &data->indices, &data->fields, &data->B,
796 &data->G, &data->W, &data->points, &assembled_offset_array};
797
798 CeedInt block_sizes[3] = {data->thread_1d, (data->dim == 1 ? 1 : data->thread_1d), -1};
799
800 CeedCallBackend(BlockGridCalculate_Hip_gen(data->dim, num_elem, data->max_P_1d, data->Q_1d, block_sizes));
801 block_sizes[2] = 1;
802 if (data->dim == 1) {
803 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
804 CeedInt sharedMem = block_sizes[2] * data->thread_1d * sizeof(CeedScalar);
805
806 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem,
807 &is_run_good, opargs));
808 } else if (data->dim == 2) {
809 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
810 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
811
812 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem,
813 &is_run_good, opargs));
814 } else if (data->dim == 3) {
815 CeedInt grid = num_elem / block_sizes[2] + ((num_elem / block_sizes[2] * block_sizes[2] < num_elem) ? 1 : 0);
816 CeedInt sharedMem = block_sizes[2] * data->thread_1d * data->thread_1d * sizeof(CeedScalar);
817
818 CeedCallBackend(CeedTryRunKernelDimShared_Hip(ceed, data->assemble_full, NULL, grid, block_sizes[0], block_sizes[1], block_sizes[2], sharedMem,
819 &is_run_good, opargs));
820 }
821 CeedCallHip(ceed, hipDeviceSynchronize());
822
823 // Restore input arrays
824 for (CeedInt i = 0; i < num_input_fields; i++) {
825 CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
826 if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
827 } else {
828 bool is_active;
829 CeedVector vec;
830
831 CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec));
832 is_active = vec == CEED_VECTOR_ACTIVE;
833 if (!is_active) CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->fields.inputs[i]));
834 CeedCallBackend(CeedVectorDestroy(&vec));
835 }
836 }
837
838 // Restore point coordinates
839 {
840 CeedVector vec;
841
842 CeedCallBackend(CeedOperatorAtPointsGetPoints(op, NULL, &vec));
843 CeedCallBackend(CeedVectorRestoreArrayRead(vec, &data->points.coords));
844 CeedCallBackend(CeedVectorDestroy(&vec));
845 }
846
847 // Restore context data
848 CeedCallBackend(CeedQFunctionRestoreInnerContextData(qf, &qf_data->d_c));
849
850 // Restore assembly array
851 CeedCallBackend(CeedVectorRestoreArray(assembled, &assembled_array));
852
853 // Cleanup
854 CeedCallBackend(CeedQFunctionDestroy(&qf));
855 if (!is_run_good) {
856 CeedDebug(ceed, "Single Operator Assemble at Points run failed, using fallback\n");
857 data->use_assembly_fallback = true;
858 }
859 }
860 CeedCallBackend(CeedDestroy(&ceed));
861
862 // Fallback, if needed
863 if (data->use_assembly_fallback) {
864 CeedOperator op_fallback;
865
866 CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back to /gpu/hip/ref CeedOperator for AtPoints SingleOperatorAssemble\n");
867 CeedCallBackend(CeedOperatorGetFallback(op, &op_fallback));
868 CeedCallBackend(CeedOperatorAssembleSingle(op_fallback, offset, assembled));
869 return CEED_ERROR_SUCCESS;
870 }
871 return CEED_ERROR_SUCCESS;
872 }
873
874 //------------------------------------------------------------------------------
875 // Create operator
876 //------------------------------------------------------------------------------
CeedOperatorCreate_Hip_gen(CeedOperator op)877 int CeedOperatorCreate_Hip_gen(CeedOperator op) {
878 bool is_composite, is_at_points;
879 Ceed ceed;
880 CeedOperator_Hip_gen *impl;
881
882 CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
883 CeedCallBackend(CeedCalloc(1, &impl));
884 CeedCallBackend(CeedOperatorSetData(op, impl));
885 CeedCall(CeedOperatorIsComposite(op, &is_composite));
886 if (is_composite) {
887 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAddComposite", CeedOperatorApplyAddComposite_Hip_gen));
888 } else {
889 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", CeedOperatorApplyAdd_Hip_gen));
890 }
891 CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
892 if (is_at_points) {
893 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal", CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip_gen));
894 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleSingle", CeedOperatorAssembleSingleAtPoints_Hip_gen));
895 }
896 if (!is_at_points) {
897 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction", CeedOperatorLinearAssembleQFunction_Hip_gen));
898 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunctionUpdate", CeedOperatorLinearAssembleQFunctionUpdate_Hip_gen));
899 }
900 CeedCallBackend(CeedSetBackendFunction(ceed, "Operator", op, "Destroy", CeedOperatorDestroy_Hip_gen));
901 CeedCallBackend(CeedDestroy(&ceed));
902 return CEED_ERROR_SUCCESS;
903 }
904
905 //------------------------------------------------------------------------------
906