Lines Matching refs:code

106   std::ostringstream code;  in CeedOperatorBuildKernel_Sycl_gen()  local
115 code << tensor_basis_code; in CeedOperatorBuildKernel_Sycl_gen()
126 code << sycl_gen_template_source; in CeedOperatorBuildKernel_Sycl_gen()
219 code << "\n#undef CEED_Q_VLA\n"; in CeedOperatorBuildKernel_Sycl_gen()
221 code << "#define CEED_Q_VLA 1\n\n"; in CeedOperatorBuildKernel_Sycl_gen()
223 code << "#define CEED_Q_VLA " << Q_1d << "\n\n"; in CeedOperatorBuildKernel_Sycl_gen()
236 code << qfunction_source; in CeedOperatorBuildKernel_Sycl_gen()
239 code << "\n// -----------------------------------------------------------------------------\n"; in CeedOperatorBuildKernel_Sycl_gen()
240code << "__attribute__((reqd_work_group_size(GROUP_SIZE_X, GROUP_SIZE_Y, GROUP_SIZE_Z), intel_reqd… in CeedOperatorBuildKernel_Sycl_gen()
241 code << "kernel void " << operator_name << "("; in CeedOperatorBuildKernel_Sycl_gen()
242 code << "const CeedInt num_elem, "; in CeedOperatorBuildKernel_Sycl_gen()
243 code << "global void* ctx, "; in CeedOperatorBuildKernel_Sycl_gen()
244 code << "global const FieldsInt_Sycl* indices, "; in CeedOperatorBuildKernel_Sycl_gen()
245 code << "global Fields_Sycl* fields, "; in CeedOperatorBuildKernel_Sycl_gen()
246 code << "global const Fields_Sycl* B, "; in CeedOperatorBuildKernel_Sycl_gen()
247 code << "global const Fields_Sycl* G, "; in CeedOperatorBuildKernel_Sycl_gen()
248 code << "global const CeedScalar * restrict W"; in CeedOperatorBuildKernel_Sycl_gen()
249 code << ") {\n"; in CeedOperatorBuildKernel_Sycl_gen()
254 code << " global const CeedScalar* d_u_" << i << " = fields->inputs[" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
259 code << " global CeedScalar* d_v_" << i << " = fields->outputs[" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
263 code << " const CeedInt DIM = " << dim << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
264 code << " const CeedInt Q_1D = " << Q_1d << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
267 code << " local CeedScalar scratch[" << scratch_size << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
268code << " local CeedScalar * elem_scratch = scratch + get_local_id(2) * T_1D" << (dim > 1 ? "*T_1… in CeedOperatorBuildKernel_Sycl_gen()
270 code << "\n // -- Input field constants and basis data --\n"; in CeedOperatorBuildKernel_Sycl_gen()
273 code << " // ---- Input field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
286 code << " const CeedInt P_in_" << i << " = " << P_1d << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
288 code << " const CeedInt P_in_" << i << " = " << Q_1d << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
290 code << " const CeedInt num_comp_in_" << i << " = " << num_comp << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
294 code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
301 code << " local CeedScalar s_B_in_" << i << "[" << P_1d * Q_1d << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
302code << " loadMatrix(P_in_" << i << "*Q_1D, B->inputs[" << i << "], s_B_in_" << i << ");\n"; in CeedOperatorBuildKernel_Sycl_gen()
307 code << " local CeedScalar s_B_in_" << i << "[" << P_1d * Q_1d << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
308code << " loadMatrix(P_in_" << i << "*Q_1D, B->inputs[" << i << "], s_B_in_" << i << ");\n"; in CeedOperatorBuildKernel_Sycl_gen()
311 code << " local CeedScalar s_G_in_" << i << "[" << Q_1d * Q_1d << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
312 code << " loadMatrix(Q_1D*Q_1D, G->inputs[" << i << "], s_G_in_" << i << ");\n"; in CeedOperatorBuildKernel_Sycl_gen()
316code << " local CeedScalar s_G_in_" << i << "[" << Q_1d * (has_collo_grad ? Q_1d : P_1d) << "];\n… in CeedOperatorBuildKernel_Sycl_gen()
317code << " loadMatrix(" << (has_collo_grad ? "Q_1D" : ("P_in_" + std::to_string(i))) << "*Q_1D, G-… in CeedOperatorBuildKernel_Sycl_gen()
331 code << "\n // -- Output field constants and basis data --\n"; in CeedOperatorBuildKernel_Sycl_gen()
333 code << " // ---- Output field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
345 code << " const CeedInt P_out_" << i << " = " << P_1d << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
347 code << " const CeedInt P_out_" << i << " = " << Q_1d << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
349 code << " const CeedInt num_comp_out_" << i << " = " << num_comp << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
352 code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
359 code << " local CeedScalar s_B_out_" << i << "[" << P_1d * Q_1d << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
360code << " loadMatrix(P_out_" << i << "*Q_1D, B->outputs[" << i << "], s_B_out_" << i << ");\n"; in CeedOperatorBuildKernel_Sycl_gen()
365 code << " local CeedScalar s_B_out_" << i << "[" << P_1d * Q_1d << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
366code << " loadMatrix(P_out_" << i << "*Q_1D, B->outputs[" << i << "], s_B_out_" << i << ");\n"; in CeedOperatorBuildKernel_Sycl_gen()
369 code << " local CeedScalar s_G_out_" << i << "[" << Q_1d * Q_1d << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
370 code << " loadMatrix(Q_1D*Q_1D, G->outputs[" << i << "], s_G_out_" << i << ");\n"; in CeedOperatorBuildKernel_Sycl_gen()
374code << " local CeedScalar s_G_out_" << i << "[" << Q_1d * (has_collo_grad ? Q_1d : P_1d) << "];\… in CeedOperatorBuildKernel_Sycl_gen()
375code << " loadMatrix(" << (has_collo_grad ? "Q_1D" : ("P_out_" + std::to_string(i))) << "*Q_1D, G… in CeedOperatorBuildKernel_Sycl_gen()
393 code << "\n // -- Element loop --\n"; in CeedOperatorBuildKernel_Sycl_gen()
394 code << " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"; in CeedOperatorBuildKernel_Sycl_gen()
395 code << " {\n"; in CeedOperatorBuildKernel_Sycl_gen()
398 code << " // -- Input field restrictions and basis actions --\n"; in CeedOperatorBuildKernel_Sycl_gen()
400 code << " // ---- Input field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
411 code << " CeedScalar r_u_" << i << "[num_comp_in_" << i << "*P_in_" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
418 code << " const CeedInt l_size_in_" << i << " = " << l_size << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
420 code << " // CompStride: " << comp_stride << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
423code << " readDofsOffset" << dim << "d(num_comp_in_" << i << ", " << comp_stride << ", P_in_" <… in CeedOperatorBuildKernel_Sycl_gen()
436code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; in CeedOperatorBuildKernel_Sycl_gen()
437code << " readDofsStrided" << dim << "d(num_comp_in_" << i << ",P_in_" << i << "," << strides[0… in CeedOperatorBuildKernel_Sycl_gen()
444 code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
448 code << " private CeedScalar* r_t_" << i << " = r_u_" << i << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
452 code << " CeedScalar r_t_" << i << "[num_comp_in_" << i << "*Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
453code << " Interp" << (dim > 1 ? "Tensor" : "") << dim << "d(num_comp_in_" << i << ", P_in_" << … in CeedOperatorBuildKernel_Sycl_gen()
458 code << " CeedScalar r_t_" << i << "[num_comp_in_" << i << "*Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
459code << " Interp" << (dim > 1 ? "Tensor" : "") << dim << "d(num_comp_in_" << i << ", P_in_" << … in CeedOperatorBuildKernel_Sycl_gen()
466 code << " CeedScalar r_t_" << i << "[num_comp_in_" << i << "*DIM*Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
467code << " Grad" << (dim > 1 ? "Tensor" : "") << (dim == 3 && Q_1d >= P_1d ? "Collocated" : "") … in CeedOperatorBuildKernel_Sycl_gen()
474 code << " CeedScalar r_t_" << i << "[Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
478code << " Weight" << (dim > 1 ? "Tensor" : "") << dim << "d(Q_1D, W, r_t_" << i << ");\n"; in CeedOperatorBuildKernel_Sycl_gen()
489 code << "\n // -- Output field setup --\n"; in CeedOperatorBuildKernel_Sycl_gen()
491 code << "\n // ---- Output field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
496 code << " CeedScalar r_tt_" << i << "[num_comp_out_" << i << "*Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
497 code << " for (CeedInt i = 0; i < num_comp_out_" << i << "; i++) {\n"; in CeedOperatorBuildKernel_Sycl_gen()
498 code << " for (CeedInt j = 0; j < Q_1D; ++j) {\n"; in CeedOperatorBuildKernel_Sycl_gen()
499 code << " r_tt_" << i << "[j + i*Q_1D] = 0.0;\n"; in CeedOperatorBuildKernel_Sycl_gen()
500 code << " }\n"; in CeedOperatorBuildKernel_Sycl_gen()
501 code << " }\n"; in CeedOperatorBuildKernel_Sycl_gen()
503 code << " CeedScalar r_tt_" << i << "[num_comp_out_" << i << "*DIM*Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
507 code << " CeedScalar r_tt_" << i << "[num_comp_out_" << i << "*Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
512 code << "\n // Note: Using planes of 3D elements\n"; in CeedOperatorBuildKernel_Sycl_gen()
513 code << " for (CeedInt q = 0; q < Q_1D; q++) {\n"; in CeedOperatorBuildKernel_Sycl_gen()
514 code << " // -- Input fields --\n"; in CeedOperatorBuildKernel_Sycl_gen()
516 code << " // ---- Input field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
520 code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
525 code << " CeedScalar r_q_" << i << "[num_comp_in_" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
533 code << " const CeedInt l_size_in_" << i << " = " << l_size << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
535 code << " // CompStride: " << comp_stride << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
538 code << " readSliceQuadsOffset" in CeedOperatorBuildKernel_Sycl_gen()
553code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; in CeedOperatorBuildKernel_Sycl_gen()
554 code << " readSliceQuadsStrided" in CeedOperatorBuildKernel_Sycl_gen()
561 code << " CeedScalar r_q_" << i << "[num_comp_in_" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
562 code << " for (CeedInt j = 0; j < num_comp_in_" << i << " ; ++j) {\n"; in CeedOperatorBuildKernel_Sycl_gen()
563 code << " r_q_" << i << "[j] = r_t_" << i << "[q + j*Q_1D];\n"; in CeedOperatorBuildKernel_Sycl_gen()
564 code << " }\n"; in CeedOperatorBuildKernel_Sycl_gen()
567 code << " CeedScalar r_q_" << i << "[num_comp_in_" << i << "*DIM];\n"; in CeedOperatorBuildKernel_Sycl_gen()
568code << " gradCollo3d(num_comp_in_" << i << ", Q_1D, q, r_t_" << i << ", s_G_in_" << i << ", … in CeedOperatorBuildKernel_Sycl_gen()
571 code << " CeedScalar r_q_" << i << "[1];\n"; in CeedOperatorBuildKernel_Sycl_gen()
572 code << " r_q_" << i << "[0] = r_t_" << i << "[q];\n"; in CeedOperatorBuildKernel_Sycl_gen()
580 code << "\n // -- Output fields --\n"; in CeedOperatorBuildKernel_Sycl_gen()
582 code << " // ---- Output field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
587 code << " CeedScalar r_qq_" << i << "[num_comp_out_" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
590 code << " CeedScalar r_qq_" << i << "[num_comp_out_" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
593 code << " CeedScalar r_qq_" << i << "[num_comp_out_" << i << "*DIM];\n"; in CeedOperatorBuildKernel_Sycl_gen()
604 code << "\n // Note: Using full elements\n"; in CeedOperatorBuildKernel_Sycl_gen()
605 code << " // -- Input fields --\n"; in CeedOperatorBuildKernel_Sycl_gen()
607 code << " // ---- Input field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
608 code << " private CeedScalar* r_q_" << i << " = r_t_" << i << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
610 code << " // -- Output fields --\n"; in CeedOperatorBuildKernel_Sycl_gen()
612 code << " // ---- Output field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
613 code << " private CeedScalar* r_qq_" << i << " = r_tt_" << i << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
617 code << "\n // -- QFunction Inputs and outputs --\n"; in CeedOperatorBuildKernel_Sycl_gen()
618 code << " const CeedScalar * in[" << num_input_fields << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
620 code << " // ---- Input field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
621 code << " in[" << i << "] = r_q_" << i << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
623 code << " CeedScalar * out[" << num_output_fields << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
625 code << " // ---- Output field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
626 code << " out[" << i << "] = r_qq_" << i << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
629 code << "\n // -- Apply QFunction --\n"; in CeedOperatorBuildKernel_Sycl_gen()
630 code << " " << qfunction_name << "(ctx, "; in CeedOperatorBuildKernel_Sycl_gen()
632 code << "1"; in CeedOperatorBuildKernel_Sycl_gen()
634 code << "Q_1D"; in CeedOperatorBuildKernel_Sycl_gen()
636 code << ", in, out);\n"; in CeedOperatorBuildKernel_Sycl_gen()
640 code << " // -- Output fields --\n"; in CeedOperatorBuildKernel_Sycl_gen()
642 code << " // ---- Output field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
645 code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
648 code << " for (CeedInt j = 0; j < num_comp_out_" << i << " ; ++j) {\n"; in CeedOperatorBuildKernel_Sycl_gen()
649 code << " r_tt_" << i << "[q + j*Q_1D] = r_qq_" << i << "[j];\n"; in CeedOperatorBuildKernel_Sycl_gen()
650 code << " }\n"; in CeedOperatorBuildKernel_Sycl_gen()
653 code << " for (CeedInt j = 0; j < num_comp_out_" << i << " ; ++j) {\n"; in CeedOperatorBuildKernel_Sycl_gen()
654 code << " r_tt_" << i << "[q + j*Q_1D] = r_qq_" << i << "[j];\n"; in CeedOperatorBuildKernel_Sycl_gen()
655 code << " }\n"; in CeedOperatorBuildKernel_Sycl_gen()
658code << " gradColloTranspose3d(num_comp_out_" << i << ",Q_1D, q, r_qq_" << i << ", s_G_out_" … in CeedOperatorBuildKernel_Sycl_gen()
669 code << " }\n"; in CeedOperatorBuildKernel_Sycl_gen()
674 code << "\n // -- Output field basis action and restrictions --\n"; in CeedOperatorBuildKernel_Sycl_gen()
676 code << " // ---- Output field " << i << " ----\n"; in CeedOperatorBuildKernel_Sycl_gen()
683 code << " // EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
686 code << " private CeedScalar* r_v_" << i << " = r_tt_" << i << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
689 code << " CeedScalar r_v_" << i << "[num_comp_out_" << i << "*P_out_" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
690code << " InterpTranspose" << (dim > 1 ? "Tensor" : "") << dim << "d(num_comp_out_" << i << ",P… in CeedOperatorBuildKernel_Sycl_gen()
694 code << " CeedScalar r_v_" << i << "[num_comp_out_" << i << "*P_out_" << i << "];\n"; in CeedOperatorBuildKernel_Sycl_gen()
696code << " InterpTranspose" << (dim > 1 ? "Tensor" : "") << dim << "d(num_comp_out_" << i << ",P… in CeedOperatorBuildKernel_Sycl_gen()
702code << " GradTranspose" << (dim > 1 ? "Tensor" : "") << (dim == 3 && Q_1d >= P_1d ? "Collocate… in CeedOperatorBuildKernel_Sycl_gen()
728 code << " const CeedInt l_size_out_" << i << " = " << l_size << ";\n"; in CeedOperatorBuildKernel_Sycl_gen()
730 code << " // CompStride: " << comp_stride << "\n"; in CeedOperatorBuildKernel_Sycl_gen()
733code << " writeDofsOffset" << dim << "d(num_comp_out_" << i << ", " << comp_stride << ", P_out_… in CeedOperatorBuildKernel_Sycl_gen()
746code << " // Strides: {" << strides[0] << ", " << strides[1] << ", " << strides[2] << "}\n"; in CeedOperatorBuildKernel_Sycl_gen()
747code << " writeDofsStrided" << dim << "d(num_comp_out_" << i << ",P_out_" << i << "," << stride… in CeedOperatorBuildKernel_Sycl_gen()
753 code << " }\n"; in CeedOperatorBuildKernel_Sycl_gen()
754 code << "}\n"; in CeedOperatorBuildKernel_Sycl_gen()
755 code << "// -----------------------------------------------------------------------------\n\n"; in CeedOperatorBuildKernel_Sycl_gen()
770 CeedDebug(ceed, code.str().c_str()); in CeedOperatorBuildKernel_Sycl_gen()
779 CeedCallBackend(CeedBuildModule_Sycl(ceed, code.str(), &impl->sycl_module, jit_constants)); in CeedOperatorBuildKernel_Sycl_gen()