| /libCEED/include/ceed/jit-source/magma/ |
| H A D | magma-common-tensor.h | 21 for (int comp = 0; comp < NUM_COMP; comp++) { in read_1d() local 22 sBuffer[comp][tx] = devptr[comp * compstride + tx]; in read_1d() 33 for (int comp = 0; comp < NUM_COMP; comp++) { in write_1d() local 34 devptr[comp * compstride + tx] = sBuffer[comp][tx]; in write_1d() 45 for (int comp = 0; comp < NUM_COMP; comp++) { in sum_1d() local 46 devptr[comp * compstride + tx] += sBuffer[comp][tx]; in sum_1d() 70 for (int comp = 0; comp < NUM_COMP; comp++) { in read_U_2d() local 74 sTmp[i * P + tx] = dU[comp * compstride + i * P + tx]; in read_U_2d() 81 rU[i_DIM][comp][i] = sTmp[tx * P + i]; in read_U_2d() 97 for (int comp = 0; comp < NUM_COMP; comp++) { in read_V_2d() local [all …]
|
| H A D | magma-basis-grad-1d.h | 29 for (int comp = 0; comp < NUM_COMP; comp++) { in magma_grad_1d_device() local 32 rv += sU[comp][i] * sT(i, tx); in magma_grad_1d_device() 34 sV[comp][tx] = rv; in magma_grad_1d_device() 63 for (int comp = 1; comp < BASIS_NUM_COMP; comp++) { in __launch_bounds__() local 64 sU[comp] = sU[comp - 1] + (1 * BASIS_P); in __launch_bounds__() 65 sV[comp] = sV[comp - 1] + (1 * BASIS_Q); in __launch_bounds__() 108 for (int comp = 1; comp < BASIS_NUM_COMP; comp++) { in __launch_bounds__() local 109 sU[comp] = sU[comp - 1] + (1 * BASIS_Q); in __launch_bounds__() 110 sV[comp] = sV[comp - 1] + (1 * BASIS_P); in __launch_bounds__() 153 for (int comp = 1; comp < BASIS_NUM_COMP; comp++) { in __launch_bounds__() local [all …]
|
| H A D | magma-basis-interp-1d.h | 29 for (int comp = 0; comp < NUM_COMP; comp++) { in magma_interp_1d_device() local 32 rv += sU[comp][i] * sT(i, tx); // sT[tx * P + i]; in magma_interp_1d_device() 34 sV[comp][tx] = rv; in magma_interp_1d_device() 63 for (int comp = 1; comp < BASIS_NUM_COMP; comp++) { in __launch_bounds__() local 64 sU[comp] = sU[comp - 1] + (1 * BASIS_P); in __launch_bounds__() 65 sV[comp] = sV[comp - 1] + (1 * BASIS_Q); in __launch_bounds__() 108 for (int comp = 1; comp < BASIS_NUM_COMP; comp++) { in __launch_bounds__() local 109 sU[comp] = sU[comp - 1] + (1 * BASIS_Q); in __launch_bounds__() 110 sV[comp] = sV[comp - 1] + (1 * BASIS_P); in __launch_bounds__() 153 for (int comp = 1; comp < BASIS_NUM_COMP; comp++) { in __launch_bounds__() local [all …]
|
| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-read-write-templates.h | 34 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadElementStrided1d() local 35 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided1d() 50 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteElementStrided1d() local 51 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 63 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in SumElementStrided1d() local 64 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided1d() 83 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadElementStrided2d() local 84 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided2d() 99 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteElementStrided2d() local 100 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() [all …]
|
| H A D | cuda-ref-restriction-curl-oriented.h | 27 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in CurlOrientedNoTranspose() local 29 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; in CurlOrientedNoTranspose() 30 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; in CurlOrientedNoTranspose() 31 …value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : … in CurlOrientedNoTranspose() 32 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedNoTranspose() 52 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in CurlOrientedUnsignedNoTranspose() local 54 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; in CurlOrientedUnsignedNoTranspose() 55 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; in CurlOrientedUnsignedNoTranspose() 56 …value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : … in CurlOrientedUnsignedNoTranspose() 57 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedUnsignedNoTranspose() [all …]
|
| H A D | cuda-gen-templates.h | 32 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadPoint() local 33 r_u[comp] = d_u[ind + comp * COMP_STRIDE]; in ReadPoint() 46 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WritePoint() local 47 d_u[ind + comp * COMP_STRIDE] += r_u[comp]; in WritePoint() 79 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + COMP_STRIDE * comp]; in ReadLVecStandard1d() local 93 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + comp * STRIDES_COMP]; in ReadLVecStrided1d() local 107 …for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[comp… in WriteLVecStandard1d() local 138 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteLVecStandard1d_Assembly() local 139 …vec_size * e_vec_size + (in_comp * NUM_COMP + comp) * P_1D * P_1D + out_node * P_1D + in_node] += … in WriteLVecStandard1d_Assembly() 153 for (CeedInt comp = 0; comp < NUM_COMP_FIELD; comp++) { in WriteLVecStandard1d_QFAssembly() local [all …]
|
| H A D | cuda-ref-restriction-offset.h | 21 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OffsetNoTranspose() local 22 …v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = u[ind + comp * RSTR_… in OffsetNoTranspose() 37 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OffsetTranspose() local 38 …atomicAdd(&v[ind + comp * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + … in OffsetTranspose() 52 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; in OffsetTranspose() local 59 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OffsetTranspose() local 60 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in OffsetTranspose() 64 …for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[com… in OffsetTranspose() local
|
| H A D | cuda-shared-basis-tensor-templates.h | 53 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in Interp1d() local 54 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); in Interp1d() 64 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTranspose1d() local 65 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); in InterpTranspose1d() 75 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpCollocatedNodes1d() local 76 r_V[comp] = r_U[comp]; in InterpCollocatedNodes1d() 86 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTransposeCollocatedNodes1d() local 87 r_V[comp] = r_U[comp]; in InterpTransposeCollocatedNodes1d() 97 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in Grad1d() local 98 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); in Grad1d() [all …]
|
| H A D | cuda-ref-restriction-oriented.h | 23 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OrientedNoTranspose() local 24 …v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = u[ind + comp * RSTR_… in OrientedNoTranspose() 41 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OrientedTranspose() local 42 atomicAdd(&v[ind + comp * RSTR_COMP_STRIDE], in OrientedTranspose() 43 …u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * (orient ? -1.0 : 1.0… in OrientedTranspose() 58 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; in OrientedTranspose() local 66 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OrientedTranspose() local 67 …value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * (orie… in OrientedTranspose() 71 …for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[com… in OrientedTranspose() local
|
| H A D | cuda-ref-restriction-at-points.h | 24 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in AtPointsTranspose() local 25 …atomicAdd(&v[ind + comp * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + … in AtPointsTranspose() 40 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; in AtPointsTranspose() local 48 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in AtPointsTranspose() local 49 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in AtPointsTranspose() 53 …for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[com… in AtPointsTranspose() local
|
| H A D | cuda-shared-basis-nontensor-templates.h | 49 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpNonTensor() local 50 Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); in InterpNonTensor() 60 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTransposeNonTensor() local 61 r_V[comp] = 0.0; in InterpTransposeNonTensor() 62 ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); in InterpTransposeNonTensor() 72 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in GradNonTensor() local 73 Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]); in GradNonTensor() 84 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0; in GradTransposeNonTensor() local 86 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in GradTransposeNonTensor() local 87 …ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[com… in GradTransposeNonTensor()
|
| H A D | cuda-ref-qfunction.h | 17 for (CeedInt comp = 0; comp < SIZE; comp++) { in readQuads() local 18 r_u[comp] = d_u[quad + num_qpts * comp]; in readQuads() 27 for (CeedInt comp = 0; comp < SIZE; comp++) { in writeQuads() local 28 d_v[quad + num_qpts * comp] = r_v[comp]; in writeQuads()
|
| H A D | cuda-shared-basis-tensor-flattened-templates.h | 107 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in QPack2d() local 109 if (t_id_x < Q_1D && t_id_y < Q_1D) data.slice[t_id_x + t_id_y * T_1D] = U[comp]; in QPack2d() 111 U[comp] = data.t_id_x < (Q_1D * Q_1D) ? data.slice[new_t_id_x + new_t_id_y * T_1D] : 0.0; in QPack2d() 119 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in QUnpack2d() local 121 if (data.t_id_x < (Q_1D * Q_1D)) data.slice[old_t_id_x + old_t_id_y * T_1D] = U[comp]; in QUnpack2d() 123 U[comp] = (t_id_x < Q_1D && t_id_y < Q_1D) ? data.slice[t_id_x + t_id_y * T_1D] : 0.0; in QUnpack2d() 137 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTensor2dFlattened() local 138 ContractX2dFlattened<NUM_COMP, P_1D, Q_1D, T_1D>(data, t_id_x, t_id_y, &r_U[comp], c_B, r_t); in InterpTensor2dFlattened() 139 ContractY2dFlattened<NUM_COMP, P_1D, Q_1D, T_1D>(data, t_id_x, t_id_y, r_t, c_B, &r_V[comp]); in InterpTensor2dFlattened() 156 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTransposeTensor2dFlattened() local [all …]
|
| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-read-write-templates.h | 34 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadElementStrided1d() local 35 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided1d() 50 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteElementStrided1d() local 51 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 63 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in SumElementStrided1d() local 64 d_v[ind + comp * strides_comp] += r_v[comp]; in SumElementStrided1d() 83 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadElementStrided2d() local 84 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided2d() 99 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteElementStrided2d() local 100 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() [all …]
|
| H A D | hip-ref-restriction-curl-oriented.h | 27 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in CurlOrientedNoTranspose() local 29 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; in CurlOrientedNoTranspose() 30 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; in CurlOrientedNoTranspose() 31 …value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : … in CurlOrientedNoTranspose() 32 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedNoTranspose() 52 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in CurlOrientedUnsignedNoTranspose() local 54 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; in CurlOrientedUnsignedNoTranspose() 55 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; in CurlOrientedUnsignedNoTranspose() 56 …value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : … in CurlOrientedUnsignedNoTranspose() 57 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedUnsignedNoTranspose() [all …]
|
| H A D | hip-gen-templates.h | 32 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadPoint() local 33 r_u[comp] = d_u[ind + comp * COMP_STRIDE]; in ReadPoint() 46 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WritePoint() local 47 d_u[ind + comp * COMP_STRIDE] += r_u[comp]; in WritePoint() 79 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + COMP_STRIDE * comp]; in ReadLVecStandard1d() local 92 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_u[comp] = d_u[ind + comp * STRIDES_COMP]; in ReadLVecStrided1d() local 106 …for (CeedInt comp = 0; comp < NUM_COMP; comp++) atomicAdd(&d_v[ind + COMP_STRIDE * comp], r_v[comp… in WriteLVecStandard1d() local 137 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteLVecStandard1d_Assembly() local 138 …vec_size * e_vec_size + (in_comp * NUM_COMP + comp) * P_1D * P_1D + out_node * P_1D + in_node] += … in WriteLVecStandard1d_Assembly() 152 for (CeedInt comp = 0; comp < NUM_COMP_FIELD; comp++) { in WriteLVecStandard1d_QFAssembly() local [all …]
|
| H A D | hip-ref-restriction-offset.h | 21 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OffsetNoTranspose() local 22 …v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = u[ind + comp * RSTR_… in OffsetNoTranspose() 37 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OffsetTranspose() local 38 …atomicAdd(&v[ind + comp * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + … in OffsetTranspose() 52 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; in OffsetTranspose() local 59 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OffsetTranspose() local 60 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in OffsetTranspose() 64 …for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[com… in OffsetTranspose() local
|
| H A D | hip-shared-basis-tensor-templates.h | 53 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in Interp1d() local 54 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); in Interp1d() 64 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTranspose1d() local 65 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); in InterpTranspose1d() 75 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpCollocatedNodes1d() local 76 r_V[comp] = r_U[comp]; in InterpCollocatedNodes1d() 86 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTransposeCollocatedNodes1d() local 87 r_V[comp] = r_U[comp]; in InterpTransposeCollocatedNodes1d() 97 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in Grad1d() local 98 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); in Grad1d() [all …]
|
| H A D | hip-ref-restriction-oriented.h | 23 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OrientedNoTranspose() local 24 …v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = u[ind + comp * RSTR_… in OrientedNoTranspose() 41 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OrientedTranspose() local 42 atomicAdd(&v[ind + comp * RSTR_COMP_STRIDE], in OrientedTranspose() 43 …u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * (orient ? -1.0 : 1.0… in OrientedTranspose() 58 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; in OrientedTranspose() local 66 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in OrientedTranspose() local 67 …value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * (orie… in OrientedTranspose() 71 …for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[com… in OrientedTranspose() local
|
| H A D | hip-ref-restriction-at-points.h | 24 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in AtPointsTranspose() local 25 …atomicAdd(&v[ind + comp * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + … in AtPointsTranspose() 40 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; in AtPointsTranspose() local 48 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { in AtPointsTranspose() local 49 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in AtPointsTranspose() 53 …for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[com… in AtPointsTranspose() local
|
| H A D | hip-shared-basis-nontensor-templates.h | 49 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpNonTensor() local 50 Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); in InterpNonTensor() 60 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTransposeNonTensor() local 61 r_V[comp] = 0.0; in InterpTransposeNonTensor() 62 ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); in InterpTransposeNonTensor() 72 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in GradNonTensor() local 73 Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]); in GradNonTensor() 84 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0; in GradTransposeNonTensor() local 86 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in GradTransposeNonTensor() local 87 …ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[com… in GradTransposeNonTensor()
|
| H A D | hip-ref-qfunction.h | 17 for (CeedInt comp = 0; comp < SIZE; comp++) { in readQuads() local 18 r_u[comp] = d_u[quad + num_qpts * comp]; in readQuads() 27 for (CeedInt comp = 0; comp < SIZE; comp++) { in writeQuads() local 28 d_v[quad + num_qpts * comp] = r_v[comp]; in writeQuads()
|
| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-gen-templates.h | 41 for (CeedInt comp = 0; comp < num_comp; ++comp) { in readDofsOffset1d() local 42 r_u[comp] = d_u[ind + strides_comp * comp]; in readDofsOffset1d() 59 for (CeedInt comp = 0; comp < num_comp; comp++) { in readDofsStrided1d() local 60 r_u[comp] = d_u[ind + comp * strides_comp]; in readDofsStrided1d() 76 for (CeedInt comp = 0; comp < num_comp; ++comp) in writeDofsOffset1d() local 77 …atomic_fetch_add_explicit(&d_v[ind + strides_comp * comp], r_v[comp], memory_order_relaxed, memory… in writeDofsOffset1d() 93 for (CeedInt comp = 0; comp < num_comp; comp++) { in writeDofsStrided1d() local 94 d_v[ind + comp * strides_comp] = r_v[comp]; in writeDofsStrided1d() 115 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + strides_comp * comp]; in readDofsOffset2d() local 132 for (CeedInt comp = 0; comp < num_comp; ++comp) r_u[comp] = d_u[ind + comp * strides_comp]; in readDofsStrided2d() local [all …]
|
| H A D | sycl-shared-basis-read-write-templates.h | 37 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadElementStrided1d() local 38 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided1d() 55 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteElementStrided1d() local 56 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided1d() 78 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadElementStrided2d() local 79 r_u[comp] = d_u[ind + comp * strides_comp]; in ReadElementStrided2d() 97 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in WriteElementStrided2d() local 98 d_v[ind + comp * strides_comp] = r_v[comp]; in WriteElementStrided2d() 121 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in ReadElementStrided3d() local 122 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; in ReadElementStrided3d() [all …]
|
| H A D | sycl-shared-basis-tensor-templates.h | 59 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in Interp1d() local 60 ContractX1d(P_1D, Q_1D, r_U + comp, s_B, r_V + comp, scratch); in Interp1d() 69 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTranspose1d() local 70 ContractTransposeX1d(P_1D, Q_1D, r_U + comp, s_B, r_V + comp, scratch); in InterpTranspose1d() 79 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in Grad1d() local 80 ContractX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch); in Grad1d() 89 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in GradTranspose1d() local 90 ContractTransposeX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch); in GradTranspose1d() 212 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { in InterpTensor2d() local 213 ContractX2d(P_1D, Q_1D, r_U + comp, s_B, r_t, scratch); in InterpTensor2d() [all …]
|