Lines Matching refs:code
207 static int CeedOperatorBuildKernelFieldData_Hip_gen(std::ostringstream &code, CeedOperator_Hip_gen … in CeedOperatorBuildKernelFieldData_Hip_gen() argument
237 …code << tab << "// -- " << (is_input ? "Input" : "Output") << " field " << i << ": " << field_name… in CeedOperatorBuildKernelFieldData_Hip_gen()
255 code << tab << "const CeedInt dim" << var_suffix << " = " << dim << ";\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
260 …code << tab << "const CeedInt P" << var_suffix << " = " << (basis == CEED_BASIS_NONE ? Q : P) << "… in CeedOperatorBuildKernelFieldData_Hip_gen()
262 …code << tab << "const CeedInt " << P_name << " = " << (basis == CEED_BASIS_NONE ? Q_1d : P_1d) << … in CeedOperatorBuildKernelFieldData_Hip_gen()
264 code << tab << "const CeedInt num_comp" << var_suffix << " = " << num_comp << ";\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
268 code << tab << "// EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
297 code << tab << "CeedScalar *s_B" << var_suffix << " = " << reuse_var << ";\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
303 code << tab << "CeedScalar *s_B" << var_suffix << " = NULL;\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
305 …code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\… in CeedOperatorBuildKernelFieldData_Hip_gen()
306 …code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << … in CeedOperatorBuildKernelFieldData_Hip_gen()
336 code << tab << "CeedScalar *s_B" << var_suffix << " = " << reuse_var << ";\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
342 code << tab << "CeedScalar *s_B" << var_suffix << " = NULL;\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
344 …code << tab << "__shared__ CeedScalar s_B" << var_suffix << "[" << P_name << "*" << Q_name << "];\… in CeedOperatorBuildKernelFieldData_Hip_gen()
345 …code << tab << "LoadMatrix<" << P_name << ", " << Q_name << ">(data, B." << option_name << "[" << … in CeedOperatorBuildKernelFieldData_Hip_gen()
356 code << tab << "CeedScalar *s_G" << var_suffix << " = " << reuse_var << ";\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
358 code << tab << "CeedScalar *s_G" << var_suffix << " = NULL;\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
360 …code << tab << "__shared__ CeedScalar s_G" << var_suffix << "[" << Q_name << "*" << Q_name << "];\… in CeedOperatorBuildKernelFieldData_Hip_gen()
361 …code << tab << "LoadMatrix<" << Q_name << ", " << Q_name << ">(data, G." << option_name << "[" << … in CeedOperatorBuildKernelFieldData_Hip_gen()
372 code << tab << "CeedScalar *s_G" << var_suffix << " = " << reuse_var << ";\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
374 code << tab << "CeedScalar *s_G" << var_suffix << " = NULL;\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
376 …code << tab << "__shared__ CeedScalar s_G" << var_suffix << "[" << Q_name << "*" << Q_name << "];\… in CeedOperatorBuildKernelFieldData_Hip_gen()
377 …code << tab << "LoadMatrix<" << Q_name << ", " << Q_name << ">(data, G." << option_name << "[" << … in CeedOperatorBuildKernelFieldData_Hip_gen()
383 code << tab << "CeedScalar *s_G" << var_suffix << " = " << reuse_var << ";\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
385 code << tab << "CeedScalar *s_G" << var_suffix << " = NULL;\n"; in CeedOperatorBuildKernelFieldData_Hip_gen()
387 …code << tab << "__shared__ CeedScalar s_G" << var_suffix << "[" << P_name << "*" << Q_name << (is_… in CeedOperatorBuildKernelFieldData_Hip_gen()
389 …code << tab << "LoadMatrix<" << P_name << ", " << Q_name << (is_tensor ? "" : "*dim") << (is_tenso… in CeedOperatorBuildKernelFieldData_Hip_gen()
410 static int CeedOperatorBuildKernelRestriction_Hip_gen(std::ostringstream &code, CeedOperator_Hip_ge… in CeedOperatorBuildKernelRestriction_Hip_gen() argument
440 code << tab << "CeedScalar *r_e" << var_suffix << " = " << buffer_name << ";\n"; in CeedOperatorBuildKernelRestriction_Hip_gen()
444 …code << tab << "CeedScalar r_e" << var_suffix << "[num_comp" << var_suffix << "*" << P_name << "];… in CeedOperatorBuildKernelRestriction_Hip_gen()
447 code << tab << "CeedScalar *r_e" << var_suffix << " = r_e_scratch;\n"; in CeedOperatorBuildKernelRestriction_Hip_gen()
454 code << tab << "const CeedInt l_size" << var_suffix << " = " << l_size << ";\n"; in CeedOperatorBuildKernelRestriction_Hip_gen()
456 code << tab << "const CeedInt comp_stride" << var_suffix << " = " << comp_stride << ";\n"; in CeedOperatorBuildKernelRestriction_Hip_gen()
458 …code << tab << "ReadLVecStandard" << (is_all_tensor ? max_dim : 1) << "d<num_comp" << var_suffix <… in CeedOperatorBuildKernelRestriction_Hip_gen()
474 …code << tab << "const CeedInt strides" << var_suffix << "_0 = " << strides[0] << ", strides" << va… in CeedOperatorBuildKernelRestriction_Hip_gen()
476 …code << tab << "ReadLVecStrided" << (is_all_tensor ? max_dim : 1) << "d<num_comp" << var_suffix <<… in CeedOperatorBuildKernelRestriction_Hip_gen()
485 code << tab << "const CeedInt comp_stride" << var_suffix << " = " << comp_stride << ";\n"; in CeedOperatorBuildKernelRestriction_Hip_gen()
503 code << tab << "const CeedInt l_size" << var_suffix << " = " << l_size << ";\n"; in CeedOperatorBuildKernelRestriction_Hip_gen()
505 code << tab << "const CeedInt comp_stride" << var_suffix << " = " << comp_stride << ";\n"; in CeedOperatorBuildKernelRestriction_Hip_gen()
507 …code << tab << "WriteLVecStandard" << (is_all_tensor ? max_dim : 1) << "d<num_comp" << var_suffix … in CeedOperatorBuildKernelRestriction_Hip_gen()
523 …code << tab << "const CeedInt strides" << var_suffix << "_0 = " << strides[0] << ", strides" << va… in CeedOperatorBuildKernelRestriction_Hip_gen()
525 …code << tab << "WriteLVecStrided" << (is_all_tensor ? max_dim : 1) << "d<num_comp" << var_suffix <… in CeedOperatorBuildKernelRestriction_Hip_gen()
547 static int CeedOperatorBuildKernelBasis_Hip_gen(std::ostringstream &code, CeedOperator_Hip_gen *dat… in CeedOperatorBuildKernelBasis_Hip_gen() argument
577 code << tab << "// EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernelBasis_Hip_gen()
582 code << tab << "CeedScalar *r_q" << var_suffix << " = r_e" << var_suffix << ";\n"; in CeedOperatorBuildKernelBasis_Hip_gen()
589 …code << tab << "CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (dim >= 3 ? Q… in CeedOperatorBuildKernelBasis_Hip_gen()
590 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
598 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << (is_all_tenso… in CeedOperatorBuildKernelBasis_Hip_gen()
599 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
607 …code << tab << "CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (dim >= 3 ? Q… in CeedOperatorBuildKernelBasis_Hip_gen()
608 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
614 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << Q_name << "];… in CeedOperatorBuildKernelBasis_Hip_gen()
615 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
624 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix… in CeedOperatorBuildKernelBasis_Hip_gen()
626 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
631 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix… in CeedOperatorBuildKernelBasis_Hip_gen()
632 …code << tab << function_name << "<num_comp" << var_suffix << ", dim" << var_suffix << ", " << P_na… in CeedOperatorBuildKernelBasis_Hip_gen()
638 code << tab << "// Nothing to do AtPoints\n"; in CeedOperatorBuildKernelBasis_Hip_gen()
645 …code << tab << "CeedScalar r_q" << var_suffix << "[" << (is_all_tensor && (dim >= 3) ? Q_name : "1… in CeedOperatorBuildKernelBasis_Hip_gen()
648 …code << tab << function_name << "<" << P_name << ", " << Q_name << ">(data, W, r_q" << var_suffix … in CeedOperatorBuildKernelBasis_Hip_gen()
661 code << tab << "CeedScalar *r_e" << var_suffix << " = r_q" << var_suffix << ";\n"; in CeedOperatorBuildKernelBasis_Hip_gen()
664 code << tab << "CeedScalar *r_e" << var_suffix << " = r_e_scratch;\n"; in CeedOperatorBuildKernelBasis_Hip_gen()
668 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
677 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
682 code << tab << "CeedScalar *r_e" << var_suffix << " = r_e_scratch;\n"; in CeedOperatorBuildKernelBasis_Hip_gen()
686 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
692 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
702 …code << tab << function_name << "<num_comp" << var_suffix << ", " << P_name << ", " << Q_name << "… in CeedOperatorBuildKernelBasis_Hip_gen()
707 …code << tab << function_name << "<num_comp" << var_suffix << ", dim" << var_suffix << ", " << P_na… in CeedOperatorBuildKernelBasis_Hip_gen()
727 static int CeedOperatorBuildKernelQFunction_Hip_gen(std::ostringstream &code, CeedOperator_Hip_gen … in CeedOperatorBuildKernelQFunction_Hip_gen() argument
738 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
739 code << tab << "// -- Output field setup\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
745 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
750 code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
752 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << (is_all_tenso… in CeedOperatorBuildKernelQFunction_Hip_gen()
759 …code << tab << "CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (max_dim >= 3… in CeedOperatorBuildKernelQFunction_Hip_gen()
760 …code << tab << "for (CeedInt i = 0; i < num_comp" << var_suffix << "*" << (max_dim >= 3 ? Q_name :… in CeedOperatorBuildKernelQFunction_Hip_gen()
763 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << (is_all_tenso… in CeedOperatorBuildKernelQFunction_Hip_gen()
770 …code << tab << "CeedScalar r_c" << var_suffix << "[num_comp" << var_suffix << "*" << (max_dim >= 3… in CeedOperatorBuildKernelQFunction_Hip_gen()
771 …code << tab << "for (CeedInt i = 0; i < num_comp" << var_suffix << "*" << (max_dim >= 3 ? Q_name :… in CeedOperatorBuildKernelQFunction_Hip_gen()
775 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*" << Q_name << "];… in CeedOperatorBuildKernelQFunction_Hip_gen()
776 …code << tab << "for (CeedInt i = 0; i < num_comp" << var_suffix << "*" << Q_name << "; i++) r_q" <… in CeedOperatorBuildKernelQFunction_Hip_gen()
778 …code << tab << "CeedScalar r_q" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix… in CeedOperatorBuildKernelQFunction_Hip_gen()
794 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
795 code << tab << "// Note: Using batches of points\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
796 …code << tab << "const CeedInt point_loop_bound = (blockDim.x*blockDim.y) * ceil((1.0*max_num_point… in CeedOperatorBuildKernelQFunction_Hip_gen()
797 code << tab << "#pragma unroll\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
798 …code << tab << "for (CeedInt i = threadIdx.x + threadIdx.y*blockDim.x; i < point_loop_bound; i += … in CeedOperatorBuildKernelQFunction_Hip_gen()
800 code << tab << "const CeedInt p = i % max_num_points;\n\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
802 code << tab << "// -- Coordinates\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
803 code << tab << "CeedScalar r_x[max_dim];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
804 …code << tab << "ReadPoint<max_dim, coords_comp_stride, max_num_points>(data, elem, p, max_num_poin… in CeedOperatorBuildKernelQFunction_Hip_gen()
806 code << tab << "// -- Input fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
813 code << tab << "// ---- Input field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
816 code << tab << "// EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
819 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
820 code << tab << "ReadPoint<num_comp" << var_suffix << ", comp_stride" << var_suffix in CeedOperatorBuildKernelQFunction_Hip_gen()
824 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
825 …code << tab << "InterpAtPoints" << max_dim << "d<num_comp" << var_suffix << ", max_num_points, " <… in CeedOperatorBuildKernelQFunction_Hip_gen()
829 …code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix… in CeedOperatorBuildKernelQFunction_Hip_gen()
830 …code << tab << "GradAtPoints" << max_dim << "d<num_comp" << var_suffix << ", max_num_points, " << … in CeedOperatorBuildKernelQFunction_Hip_gen()
834 code << tab << "CeedScalar r_s" << var_suffix << "[1];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
835 code << tab << "r_s" << var_suffix << "[0] = 1.0;\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
844 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
845 code << tab << "// -- Output fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
851 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
856 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
859 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
862 …code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix… in CeedOperatorBuildKernelQFunction_Hip_gen()
876 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
877 code << tab << "// Note: Using planes of 3D elements\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
878 code << tab << "#pragma unroll\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
879 code << tab << "for (CeedInt q = 0; q < " << Q_name << "; q++) {\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
881 code << tab << "// -- Input fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
887 code << tab << "// ---- Input field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
890 code << tab << "// EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
895 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
911 …code << tab << "const CeedInt strides" << var_suffix << "_0 = " << strides[0] << ", strides" << va… in CeedOperatorBuildKernelQFunction_Hip_gen()
913 …code << tab << "ReadEVecSliceStrided3d<num_comp" << var_suffix << ", " << Q_name << ", strides" <<… in CeedOperatorBuildKernelQFunction_Hip_gen()
921 code << tab << "const CeedInt l_size" << var_suffix << " = " << l_size << ";\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
923 … code << tab << "const CeedInt comp_stride" << var_suffix << " = " << comp_stride << ";\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
926 …code << tab << "ReadEVecSliceStandard3d<num_comp" << var_suffix << ", comp_stride" << var_suffix <… in CeedOperatorBuildKernelQFunction_Hip_gen()
932 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
933 code << tab << "for (CeedInt j = 0; j < num_comp" << var_suffix << "; j++) {\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
935 … code << tab << "r_s" << var_suffix << "[j] = r_q" << var_suffix << "[q + j*" << Q_name << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
937 code << tab << "}\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
940 …code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix… in CeedOperatorBuildKernelQFunction_Hip_gen()
941 …code << tab << "GradColloSlice3d<num_comp" << var_suffix << ", " << Q_name << ", OP_T_1D>(data, q,… in CeedOperatorBuildKernelQFunction_Hip_gen()
945 code << tab << "CeedScalar r_s" << var_suffix << "[1];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
946 code << tab << "r_s" << var_suffix << "[0] = r_q" << var_suffix << "[q];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
955 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
956 code << tab << "// -- Output fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
962 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
967 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
970 code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
973 …code << tab << "CeedScalar r_s" << var_suffix << "[num_comp" << var_suffix << "*dim" << var_suffix… in CeedOperatorBuildKernelQFunction_Hip_gen()
985 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
986 code << tab << "// Note: Using full elements\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
987 code << tab << "{\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
989 code << tab << "// -- Input fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
994 code << tab << "// ---- Input field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
995 code << tab << "CeedScalar *r_s_in_" << i << " = r_q_in_" << i << ";\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
997 code << tab << "// -- Output fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1002 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1003 code << tab << "CeedScalar *r_s_out_" << i << " = r_q_out_" << i << ";\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1008 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1009 code << tab << "// -- QFunction inputs and outputs\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1010 code << tab << "// ---- Inputs\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1011 code << tab << "CeedScalar *inputs[" << CeedIntMax(num_input_fields, 1) << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1016 code << tab << "// ------ Input field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1017 code << tab << "inputs[" << i << "] = r_s_in_" << i << ";\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1019 code << tab << "// ---- Outputs\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1020 code << tab << "CeedScalar *outputs[" << CeedIntMax(num_output_fields, 1) << "];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1025 code << tab << "// ------ Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1026 code << tab << "outputs[" << i << "] = r_s_out_" << i << ";\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1030 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1031 code << tab << "// -- Apply QFunction\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1032 code << tab << "" << qfunction_name << "(ctx, "; in CeedOperatorBuildKernelQFunction_Hip_gen()
1034 code << "1"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1036 code << Q_name; in CeedOperatorBuildKernelQFunction_Hip_gen()
1038 code << ", inputs, outputs);\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1042 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1043 code << tab << "// -- Output fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1050 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1053 code << tab << "// EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1063 code << tab << "const CeedInt comp_stride" << var_suffix << " = " << comp_stride << ";\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1064 code << tab << "WritePoint<num_comp" << var_suffix << ", comp_stride" << var_suffix in CeedOperatorBuildKernelQFunction_Hip_gen()
1070 code << tab << "if (i >= points.num_per_elem[elem]) {\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1072 …code << tab << "for (CeedInt j = 0; j < num_comp" << var_suffix << "; j++) r_s" << var_suffix << "… in CeedOperatorBuildKernelQFunction_Hip_gen()
1074 code << tab << "}\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1075 …code << tab << "InterpTransposeAtPoints" << max_dim << "d<num_comp" << var_suffix << ", max_num_po… in CeedOperatorBuildKernelQFunction_Hip_gen()
1079 code << tab << "if (i >= points.num_per_elem[elem]) {\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1081 …code << tab << "for (CeedInt j = 0; j < num_comp" << var_suffix << "*dim" << var_suffix << "; j++)… in CeedOperatorBuildKernelQFunction_Hip_gen()
1083 code << tab << "}\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1084 …code << tab << "GradTransposeAtPoints" << max_dim << "d<num_comp" << var_suffix << ", max_num_poin… in CeedOperatorBuildKernelQFunction_Hip_gen()
1098 code << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1099 code << tab << "// -- Output fields\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1106 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1109 code << tab << "// EvalMode: " << CeedEvalModes[eval_mode] << "\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1112 code << tab << "for (CeedInt j = 0; j < num_comp" << var_suffix << " ; j++) {\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1114 … code << tab << "r_q" << var_suffix << "[q + j*" << Q_name << "] = r_s" << var_suffix << "[j];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1116 code << tab << "}\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1119 code << tab << "for (CeedInt j = 0; j < num_comp" << var_suffix << " ; j++) {\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1121 … code << tab << "r_q" << var_suffix << "[q + j*" << Q_name << "] = r_s" << var_suffix << "[j];\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1123 code << tab << "}\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1126 …code << tab << "GradColloSliceTranspose3d<num_comp" << var_suffix << ", " << Q_name << ", OP_T_1D>… in CeedOperatorBuildKernelQFunction_Hip_gen()
1140 code << tab << "}\n"; in CeedOperatorBuildKernelQFunction_Hip_gen()
1156 std::ostringstream code; in CeedOperatorBuildKernel_Hip_gen() local
1284 code << tab << "// Tensor basis source\n"; in CeedOperatorBuildKernel_Hip_gen()
1285 code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-tensor-templates.h>\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1288 code << tab << "// Non-tensor basis source\n"; in CeedOperatorBuildKernel_Hip_gen()
1289 code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-nontensor-templates.h>\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1292 code << tab << "// AtPoints basis source\n"; in CeedOperatorBuildKernel_Hip_gen()
1293 … code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-tensor-at-points-templates.h>\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1296 code << tab << "// Tensor basis source\n"; in CeedOperatorBuildKernel_Hip_gen()
1297 … code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-tensor-flattened-templates.h>\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1299 code << tab << "// CodeGen operator source\n"; in CeedOperatorBuildKernel_Hip_gen()
1300 code << tab << "#include <ceed/jit-source/hip/hip-gen-templates.h>\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1309 code << "\n" << tab << "#undef CEED_Q_VLA\n"; in CeedOperatorBuildKernel_Hip_gen()
1311 code << tab << "#define CEED_Q_VLA 1\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1313 code << tab << "#define CEED_Q_VLA " << Q_1d << "\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1323 code << tab << "// User QFunction source\n"; in CeedOperatorBuildKernel_Hip_gen()
1324 code << tab << "#include \"" << source_path << "\"\n\n"; in CeedOperatorBuildKernel_Hip_gen()
1328 …code << "\n" << tab << "// -----------------------------------------------------------------------… in CeedOperatorBuildKernel_Hip_gen()
1329 code << tab << "// Operator Kernel\n"; in CeedOperatorBuildKernel_Hip_gen()
1330 code << tab << "// \n"; in CeedOperatorBuildKernel_Hip_gen()
1331 code << tab << "// d_[in,out]_i: CeedVector device array\n"; in CeedOperatorBuildKernel_Hip_gen()
1332 code << tab << "// r_[in,out]_e_i: Element vector register\n"; in CeedOperatorBuildKernel_Hip_gen()
1333 code << tab << "// r_[in,out]_q_i: Quadrature space vector register\n"; in CeedOperatorBuildKernel_Hip_gen()
1334 code << tab << "// r_[in,out]_c_i: AtPoints Chebyshev coefficients register\n"; in CeedOperatorBuildKernel_Hip_gen()
1335 code << tab << "// r_[in,out]_s_i: Quadrature space slice vector register\n"; in CeedOperatorBuildKernel_Hip_gen()
1336 code << tab << "// \n"; in CeedOperatorBuildKernel_Hip_gen()
1337 code << tab << "// s_B_[in,out]_i: Interpolation matrix, shared memory\n"; in CeedOperatorBuildKernel_Hip_gen()
1338 code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n"; in CeedOperatorBuildKernel_Hip_gen()
1339 …code << tab << "// -----------------------------------------------------------------------------\n… in CeedOperatorBuildKernel_Hip_gen()
1340 code << tab << "extern \"C\" __launch_bounds__(BLOCK_SIZE)\n"; in CeedOperatorBuildKernel_Hip_gen()
1341 code << "__global__ void " << operator_name in CeedOperatorBuildKernel_Hip_gen()
1351 … code << tab << "const CeedScalar *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n"; in CeedOperatorBuildKernel_Hip_gen()
1355 code << tab << "CeedScalar *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n"; in CeedOperatorBuildKernel_Hip_gen()
1358 code << tab << "const CeedInt max_dim = " << max_dim << ";\n"; in CeedOperatorBuildKernel_Hip_gen()
1360 code << tab << "const CeedInt Q = " << Q << ";\n"; in CeedOperatorBuildKernel_Hip_gen()
1363 code << tab << "const CeedInt Q_1d = " << Q_1d << ";\n"; in CeedOperatorBuildKernel_Hip_gen()
1366 code << tab << "const CeedInt max_num_points = " << max_num_points << ";\n"; in CeedOperatorBuildKernel_Hip_gen()
1367 code << tab << "const CeedInt coords_comp_stride = " << coords_comp_stride << ";\n"; in CeedOperatorBuildKernel_Hip_gen()
1371 code << tab << "extern __shared__ CeedScalar slice[];\n"; in CeedOperatorBuildKernel_Hip_gen()
1372 code << tab << "SharedData_Hip data;\n"; in CeedOperatorBuildKernel_Hip_gen()
1373 code << tab << "data.t_id_x = threadIdx.x;\n"; in CeedOperatorBuildKernel_Hip_gen()
1374 code << tab << "data.t_id_y = threadIdx.y;\n"; in CeedOperatorBuildKernel_Hip_gen()
1375 code << tab << "data.t_id_z = threadIdx.z;\n"; in CeedOperatorBuildKernel_Hip_gen()
1376 …code << tab << "data.t_id = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*blockDim.y*blockD… in CeedOperatorBuildKernel_Hip_gen()
1377 …code << tab << "data.slice = slice + data.t_id_z*OP_T_1D" << ((!is_all_tensor || max_dim == 1) ? … in CeedOperatorBuildKernel_Hip_gen()
1484 code << "\n" << tab << "// Input field constants and basis data\n"; in CeedOperatorBuildKernel_Hip_gen()
1486 …CeedCallBackend(CeedOperatorBuildKernelFieldData_Hip_gen(code, data, tab, i, op_input_fields[i], q… in CeedOperatorBuildKernel_Hip_gen()
1489 code << "\n" << tab << "// Output field constants and basis data\n"; in CeedOperatorBuildKernel_Hip_gen()
1491 …CeedCallBackend(CeedOperatorBuildKernelFieldData_Hip_gen(code, data, tab, i, op_output_fields[i], … in CeedOperatorBuildKernel_Hip_gen()
1496 code << "\n" << tab << "// Element loop\n"; in CeedOperatorBuildKernel_Hip_gen()
1497 code << tab << "__syncthreads();\n"; in CeedOperatorBuildKernel_Hip_gen()
1498 …code << tab << "for (CeedInt elem = blockIdx.x*blockDim.z + threadIdx.z; elem < num_elem; elem += … in CeedOperatorBuildKernel_Hip_gen()
1532 code << tab << "// Scratch restriction buffer space\n"; in CeedOperatorBuildKernel_Hip_gen()
1533 code << tab << "CeedScalar r_e_scratch[" << max_rstr_buffer_size << "];\n"; in CeedOperatorBuildKernel_Hip_gen()
1580 code << "\n" << tab << "// -- Input field restrictions and basis actions\n"; in CeedOperatorBuildKernel_Hip_gen()
1586 code << tab << "// ---- Input field " << f << ": " << field_name << "\n"; in CeedOperatorBuildKernel_Hip_gen()
1589 …CeedCallBackend(CeedOperatorBuildKernelRestriction_Hip_gen(code, data, tab, f, field_rstr_in_buffe… in CeedOperatorBuildKernel_Hip_gen()
1593 …CeedCallBackend(CeedOperatorBuildKernelBasis_Hip_gen(code, data, tab, f, op_input_fields[f], qf_in… in CeedOperatorBuildKernel_Hip_gen()
1598 …CeedCallBackend(CeedOperatorBuildKernelQFunction_Hip_gen(code, data, tab, max_dim, max_num_points,… in CeedOperatorBuildKernel_Hip_gen()
1603 code << "\n" << tab << "// -- Output field basis action and restrictions\n"; in CeedOperatorBuildKernel_Hip_gen()
1608 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernel_Hip_gen()
1611 …CeedCallBackend(CeedOperatorBuildKernelBasis_Hip_gen(code, data, tab, i, op_output_fields[i], qf_o… in CeedOperatorBuildKernel_Hip_gen()
1615 …CeedCallBackend(CeedOperatorBuildKernelRestriction_Hip_gen(code, data, tab, i, NULL, op_output_fie… in CeedOperatorBuildKernel_Hip_gen()
1621 code << tab << "}\n"; in CeedOperatorBuildKernel_Hip_gen()
1623 code << tab << "}\n"; in CeedOperatorBuildKernel_Hip_gen()
1624 …code << tab << "// -----------------------------------------------------------------------------\n… in CeedOperatorBuildKernel_Hip_gen()
1636 …CeedCallBackend(CeedTryCompile_Hip(ceed, code.str().c_str(), &is_compile_good, &data->module, 2, "… in CeedOperatorBuildKernel_Hip_gen()
1664 std::ostringstream code; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen() local
1691 code << tab << "// Tensor basis source\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1692 code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-tensor-templates.h>\n\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1693 code << tab << "// AtPoints basis source\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1694 code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-tensor-at-points-templates.h>\n\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1695 code << tab << "// CodeGen operator source\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1696 code << tab << "#include <ceed/jit-source/hip/hip-gen-templates.h>\n\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1709 code << "\n" << tab << "#undef CEED_Q_VLA\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1710 code << tab << "#define CEED_Q_VLA 1\n\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1719 code << tab << "// User QFunction source\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1720 code << tab << "#include \"" << source_path << "\"\n\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1724 …code << "\n" << tab << "// -----------------------------------------------------------------------… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1725 code << tab << "// Operator Assembly Kernel\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1726 code << tab << "// \n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1727 code << tab << "// d_[in,out]_i: CeedVector device array\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1728 code << tab << "// r_[in,out]_e_i: Element vector register\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1729 code << tab << "// r_[in,out]_q_i: Quadrature space vector register\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1730 code << tab << "// r_[in,out]_c_i: AtPoints Chebyshev coefficients register\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1731 code << tab << "// r_[in,out]_s_i: Quadrature space slice vector register\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1732 code << tab << "// \n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1733 code << tab << "// s_B_[in,out]_i: Interpolation matrix, shared memory\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1734 code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1735 …code << tab << "// -----------------------------------------------------------------------------\n… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1736 code << tab << "extern \"C\" __global__ void " << operator_name in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1747 … code << tab << "const CeedScalar *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1751 code << tab << "CeedScalar *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1754 code << tab << "const CeedInt max_dim = " << max_dim << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1755 code << tab << "const CeedInt Q_1d = " << Q_1d << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1756 code << tab << "const CeedInt max_num_points = " << max_num_points << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1757 code << tab << "const CeedInt coords_comp_stride = " << coords_comp_stride << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1760 code << tab << "extern __shared__ CeedScalar slice[];\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1761 code << tab << "SharedData_Hip data;\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1762 code << tab << "data.t_id_x = threadIdx.x;\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1763 code << tab << "data.t_id_y = threadIdx.y;\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1764 code << tab << "data.t_id_z = threadIdx.z;\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1765 …code << tab << "data.t_id = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*blockDim.y*blockD… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1766 …code << tab << "data.slice = slice + data.t_id_z*OP_T_1D" << ((!is_all_tensor || max_dim == 1) ? … in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1842 code << "\n" << tab << "// Input field constants and basis data\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1844 …CeedCallBackend(CeedOperatorBuildKernelFieldData_Hip_gen(code, data, tab, i, op_input_fields[i], q… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1847 code << "\n" << tab << "// Output field constants and basis data\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1849 …CeedCallBackend(CeedOperatorBuildKernelFieldData_Hip_gen(code, data, tab, i, op_output_fields[i], … in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1854 code << "\n" << tab << "// Element loop\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1855 code << tab << "__syncthreads();\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1856 …code << tab << "for (CeedInt elem = blockIdx.x*blockDim.z + threadIdx.z; elem < num_elem; elem += … in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1890 code << tab << "// Scratch restriction buffer space\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1891 code << tab << "CeedScalar r_e_scratch[" << max_rstr_buffer_size << "];\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1938 code << "\n" << tab << "// -- Input field restrictions and basis actions\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1955 code << tab << "// ---- Input field " << f << ": " << field_name << "\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1960 code << tab << "// Active field - no restriction or basis action here\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1963 …code << tab << "CeedScalar r_e" << var_suffix << "[num_comp" << var_suffix << "*" << (max_dim >= 3… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1966 … code << tab << "CeedScalar *r_e" << var_suffix << " = r_e_in_" << active_field_index << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1970 …CeedCallBackend(CeedOperatorBuildKernelRestriction_Hip_gen(code, data, tab, f, field_rstr_in_buffe… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1974 …CeedCallBackend(CeedOperatorBuildKernelBasis_Hip_gen(code, data, tab, f, op_input_fields[f], qf_in… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1982 code << "\n" << tab << "// Loop over nodes in active field\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1983 …code << tab << "for (CeedInt n = 0; n < num_comp" << active_var_suffix << "*P_1d" << active_var_su… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1988 code << tab << "// Set current active node and component to 1.0\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
1989 …code << tab << "SetEVecStandard" << max_dim << "d_Single<num_comp" << active_var_suffix << ", P_1d… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2007 code << tab << "// ---- Input field " << f << ": " << field_name << "\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2010 …CeedCallBackend(CeedOperatorBuildKernelBasis_Hip_gen(code, data, tab, f, op_input_fields[f], qf_in… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2015 …CeedCallBackend(CeedOperatorBuildKernelQFunction_Hip_gen(code, data, tab, max_dim, max_num_points,… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2020 code << "\n" << tab << "// -- Output field basis action and restrictions\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2035 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2038 …CeedCallBackend(CeedOperatorBuildKernelBasis_Hip_gen(code, data, tab, i, op_output_fields[i], qf_o… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2050 code << tab << "const CeedInt l_size" << var_suffix << " = " << l_size << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2052 code << tab << "const CeedInt comp_stride" << var_suffix << " = " << comp_stride << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2053 …code << tab << "WriteLVecStandard" << max_dim << "d_Assembly<num_comp" << var_suffix << ", comp_st… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2064 code << tab << "const CeedInt l_size" << var_suffix << " = " << l_size << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2066 code << tab << "const CeedInt comp_stride" << var_suffix << " = " << comp_stride << ";\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2067 …code << tab << "WriteLVecStandard" << max_dim << "d_Single<num_comp" << var_suffix << ", comp_stri… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2074 code << "\n" << tab << "// Reset current active node and component to 0.0\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2075 …code << tab << "SetEVecStandard" << max_dim << "d_Single<num_comp" << active_var_suffix << ", P_1d… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2080 code << tab << "}\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2084 code << tab << "}\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2086 code << tab << "}\n"; in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2087 …code << tab << "// -----------------------------------------------------------------------------\n… in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2099 CeedCallBackend(CeedTryCompile_Hip(ceed, code.str().c_str(), &is_compile_good, in CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen()
2135 std::ostringstream code; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen() local
2207 code << tab << "// Tensor basis source\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2208 code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-tensor-templates.h>\n\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2211 code << tab << "// Non-tensor basis source\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2212 code << tab << "#include <ceed/jit-source/hip/hip-shared-basis-nontensor-templates.h>\n\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2215 code << "// Tensor basis source\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2216 code << "#include <ceed/jit-source/hip/hip-shared-basis-tensor-flattened-templates.h>\n\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2218 code << "// CodeGen operator source\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2219 code << "#include <ceed/jit-source/hip/hip-gen-templates.h>\n\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2228 code << "\n" << tab << "#undef CEED_Q_VLA\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2230 code << tab << "#define CEED_Q_VLA 1\n\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2232 code << tab << "#define CEED_Q_VLA " << Q_1d << "\n\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2242 code << tab << "// User QFunction source\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2243 code << tab << "#include \"" << source_path << "\"\n\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2247 …code << "\n" << tab << "// -----------------------------------------------------------------------… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2248 code << tab << "// Operator Assembly Kernel\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2249 code << tab << "// \n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2250 code << tab << "// d_[in,out]_i: CeedVector device array\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2251 code << tab << "// r_[in,out]_e_i: Element vector register\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2252 code << tab << "// r_[in,out]_q_i: Quadrature space vector register\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2253 code << tab << "// r_[in,out]_c_i: AtPoints Chebyshev coefficients register\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2254 code << tab << "// r_[in,out]_s_i: Quadrature space slice vector register\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2255 code << tab << "// \n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2256 code << tab << "// s_B_[in,out]_i: Interpolation matrix, shared memory\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2257 code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2258 …code << tab << "// -----------------------------------------------------------------------------\n… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2259 code << tab << "extern \"C\" __global__ void " << operator_name in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2270 … code << tab << "const CeedScalar *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2284 code << tab << "CeedScalar *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2288 code << tab << "const CeedInt max_dim = " << max_dim << ";\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2290 code << tab << "const CeedInt Q = " << Q << ";\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2293 code << tab << "const CeedInt Q_1d = " << Q_1d << ";\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2297 code << tab << "extern __shared__ CeedScalar slice[];\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2298 code << tab << "SharedData_Hip data;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2299 code << tab << "data.t_id_x = threadIdx.x;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2300 code << tab << "data.t_id_y = threadIdx.y;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2301 code << tab << "data.t_id_z = threadIdx.z;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2302 …code << tab << "data.t_id = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*blockDim.y*blockD… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2303 …code << tab << "data.slice = slice + data.t_id_z*OP_T_1D" << ((!is_all_tensor || max_dim == 1) ? … in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2410 code << "\n" << tab << "// Input field constants and basis data\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2412 …CeedCallBackend(CeedOperatorBuildKernelFieldData_Hip_gen(code, data, tab, i, op_input_fields[i], q… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2415 code << "\n" << tab << "// Output field constants and basis data\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2417 …CeedCallBackend(CeedOperatorBuildKernelFieldData_Hip_gen(code, data, tab, i, op_output_fields[i], … in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2422 code << "\n" << tab << "// Element loop\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2423 code << tab << "__syncthreads();\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2424 …code << tab << "for (CeedInt elem = blockIdx.x*blockDim.z + threadIdx.z; elem < num_elem; elem += … in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2458 code << tab << "// Scratch restriction buffer space\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2459 code << tab << "CeedScalar r_e_scratch[" << max_rstr_buffer_size << "];\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2506 code << "\n" << tab << "// -- Input field restrictions and basis actions\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2524 code << tab << "// ---- Input field " << f << ": " << field_name << "\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2535 … code << tab << "CeedScalar r_q_in_" << f << "[num_comp_in_" << f << "*" << "dim_in_" << f << "*" in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2538 …code << tab << "CeedScalar r_q_in_" << f << "[num_comp_in_" << f << "*" << (is_all_tensor && (max_… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2540 code << tab << "const CeedInt field_size_in_" << f << " = " << field_size << ";\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2543 …CeedCallBackend(CeedOperatorBuildKernelRestriction_Hip_gen(code, data, tab, f, field_rstr_in_buffe… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2547 …CeedCallBackend(CeedOperatorBuildKernelBasis_Hip_gen(code, data, tab, f, op_input_fields[f], qf_in… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2551 code << tab << "const CeedInt field_sizes_in[" << num_active_in << "] = {"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2553 code << "field_size_in_" << active_fields_in[i] << (i < num_active_in - 1 ? ", " : ""); in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2555 code << "};\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2556 code << tab << "CeedScalar * r_q_in[" << num_active_in << "] = {"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2558 code << "r_q_in_" << active_fields_in[i] << (i < num_active_in - 1 ? ", " : ""); in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2560 code << "};\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2581 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2582 code << tab << "const CeedInt field_size_out_" << i << " = " << field_size << ";\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2585 code << tab << "const CeedInt field_sizes_out[" << num_active_out << "] = {"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2587 code << "field_size_out_" << active_fields_out[i] << (i < num_active_out - 1 ? ", " : ""); in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2589 code << "};\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2590 code << tab << "const CeedInt total_size_out = " << qf_assembly_size_out << ";\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2593 code << "\n" << tab << "CeedInt input_offset = 0;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2594 code << tab << "// Loop over active QFunction input fields\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2595 code << tab << "const CeedInt num_active_in = " << num_active_in << ";\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2596 code << tab << "for (CeedInt a = 0; a < num_active_in; a++) {\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2600 code << "\n" << tab << "// Loop over current active input field size\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2601 code << tab << "const CeedInt field_size_in = field_sizes_in[a];\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2602 code << tab << "for (CeedInt s = 0; s < field_size_in; s++) {\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2606 code << tab << "// Set current active point and component to 1.0\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2608 code << tab << "for (CeedInt i = 0; i < Q_1d; i++) r_q_in[a][i + s * Q_1d] = 1.0;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2610 code << tab << "r_q_in[a][s] = 1.0;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2614 …CeedCallBackend(CeedOperatorBuildKernelQFunction_Hip_gen(code, data, tab, max_dim, max_num_points,… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2619 code << "\n" << tab << "// -- Output field basis action and restrictions\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2636 code << tab << "// ---- Output field " << i << ": " << field_name << "\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2641 …code << tab << "WriteLVecStandard" << (is_all_tensor ? max_dim : 1) << "d_QFAssembly<total_size_ou… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2648 code << "\n" << tab << "// Reset current active node and component to 0.0\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2650 code << tab << "for (CeedInt i = 0; i < Q_1d; i++) r_q_in[a][i + s * Q_1d] = 0.0;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2652 code << tab << "r_q_in[a][s] = 0.0;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2657 code << tab << "}\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2658 code << tab << "input_offset += field_size_in;\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2662 code << tab << "}\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2666 code << tab << "}\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2668 code << tab << "}\n"; in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2669 …code << tab << "// -----------------------------------------------------------------------------\n… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()
2681 …CeedCallBackend(CeedTryCompile_Hip(ceed, code.str().c_str(), &is_compile_good, &data->module_assem… in CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen()