Lines Matching full:n

77                                     "#if defined(cl_khr_fp64)\n"  in PetscFEOpenCLGenerateIntegrationCode()
78 "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n" in PetscFEOpenCLGenerateIntegrationCode()
79 "#elif defined(cl_amd_fp64)\n" in PetscFEOpenCLGenerateIntegrationCode()
80 "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n" in PetscFEOpenCLGenerateIntegrationCode()
81 "#endif\n", in PetscFEOpenCLGenerateIntegrationCode()
87 "\n" in PetscFEOpenCLGenerateIntegrationCode()
88 …entsAux, __global %s *jacobianInverses, __global %s *jacobianDeterminants, __global %s *elemVec)\n" in PetscFEOpenCLGenerateIntegrationCode()
89 "{\n", in PetscFEOpenCLGenerateIntegrationCode()
93 " /* Quadrature points\n" in PetscFEOpenCLGenerateIntegrationCode()
94 " - (x1,y1,x2,y2,...) */\n" in PetscFEOpenCLGenerateIntegrationCode()
95 " const %s points[%d] = {\n", in PetscFEOpenCLGenerateIntegrationCode()
98 …scCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, points[p *… in PetscFEOpenCLGenerateIntegrationCode()
100 PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
102 " /* Quadrature weights\n" in PetscFEOpenCLGenerateIntegrationCode()
103 " - (v1,v2,...) */\n" in PetscFEOpenCLGenerateIntegrationCode()
104 " const %s weights[%d] = {\n", in PetscFEOpenCLGenerateIntegrationCode()
106 …scCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, weights[p]… in PetscFEOpenCLGenerateIntegrationCode()
107 PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
111 " /* Nodal basis function evaluations\n" in PetscFEOpenCLGenerateIntegrationCode()
112 … " - basis component is fastest varying, the basis function, then point */\n" in PetscFEOpenCLGenerateIntegrationCode()
113 " const %s Basis[%d] = {\n", in PetscFEOpenCLGenerateIntegrationCode()
117 …scCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "%g,\n", &count, T->T[0][(p… in PetscFEOpenCLGenerateIntegrationCode()
120 PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
122 "\n" in PetscFEOpenCLGenerateIntegrationCode()
123 " /* Nodal basis function derivative evaluations,\n" in PetscFEOpenCLGenerateIntegrationCode()
124 …erivative direction is fastest varying, then basis component, then basis function, then point */\n" in PetscFEOpenCLGenerateIntegrationCode()
125 " const %s%d BasisDerivatives[%d] = {\n", in PetscFEOpenCLGenerateIntegrationCode()
138 PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "),\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
142 PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, "};\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
145 … " const int dim = %d; // The spatial dimension\n" in PetscFEOpenCLGenerateIntegrationCode()
146 … " const int N_bl = %d; // The number of concurrent blocks\n" in PetscFEOpenCLGenerateIntegrationCode()
147 … " const int N_b = %d; // The number of basis functions\n" in PetscFEOpenCLGenerateIntegrationCode()
148 … " const int N_comp = %d; // The number of basis function components\n" in PetscFEOpenCLGenerateIntegrationCode()
149 … const int N_bt = N_b*N_comp; // The total number of scalar basis functions\n" in PetscFEOpenCLGenerateIntegrationCode()
150 … " const int N_q = %d; // The number of quadrature points\n" in PetscFEOpenCLGenerateIntegrationCode()
151 … // The block size, LCM(N_b*N_comp, N_q), Notice that a block is not processed simultaneously\n" in PetscFEOpenCLGenerateIntegrationCode()
152 … " const int N_t = N_bst*N_bl; // The number of threads, N_bst * N_bl\n" in PetscFEOpenCLGenerateIntegrationCode()
153 …onst int N_bc = N_t/N_comp; // The number of cells per batch (N_b*N_q*N_bl)\n" in PetscFEOpenCLGenerateIntegrationCode()
154 " const int N_sbc = N_bst / (N_q * N_comp);\n" in PetscFEOpenCLGenerateIntegrationCode()
155 " const int N_sqc = N_bst / N_bt;\n" in PetscFEOpenCLGenerateIntegrationCode()
156 " /*const int N_c = N_cb * N_bc;*/\n" in PetscFEOpenCLGenerateIntegrationCode()
157 "\n" in PetscFEOpenCLGenerateIntegrationCode()
158 " /* Calculated indices */\n" in PetscFEOpenCLGenerateIntegrationCode()
159 … " /*const int tidx = get_local_id(0) + get_local_size(0)*get_local_id(1);*/\n" in PetscFEOpenCLGenerateIntegrationCode()
160 " const int tidx = get_local_id(0);\n" in PetscFEOpenCLGenerateIntegrationCode()
161 … " const int blidx = tidx / N_bst; // Block number for this thread\n" in PetscFEOpenCLGenerateIntegrationCode()
162 … " const int bidx = tidx %% N_bt; // Basis function mapped to this thread\n" in PetscFEOpenCLGenerateIntegrationCode()
163 … " const int cidx = tidx %% N_comp; // Basis component mapped to this thread\n" in PetscFEOpenCLGenerateIntegrationCode()
164 …" const int qidx = tidx %% N_q; // Quadrature point mapped to this thread\n" in PetscFEOpenCLGenerateIntegrationCode()
165 …t int blbidx = tidx %% N_q + blidx*N_q; // Cell mapped to this thread in the basis phase\n" in PetscFEOpenCLGenerateIntegrationCode()
166 … blqidx = tidx %% N_b + blidx*N_b; // Cell mapped to this thread in the quadrature phase\n" in PetscFEOpenCLGenerateIntegrationCode()
167 … " const int gidx = get_group_id(1)*get_num_groups(0) + get_group_id(0);\n" in PetscFEOpenCLGenerateIntegrationCode()
168 " const int Goffset = gidx*N_cb*N_bc;\n", in PetscFEOpenCLGenerateIntegrationCode()
172 "\n" in PetscFEOpenCLGenerateIntegrationCode()
173 " /* Quadrature data */\n" in PetscFEOpenCLGenerateIntegrationCode()
174 … " %s w; // $w_q$, Quadrature weight at $x_q$\n" in PetscFEOpenCLGenerateIntegrationCode()
175 … phi_i[%d]; //[N_bt*N_q]; // $\\phi_i(x_q)$, Value of the basis function $i$ at $x_q$\n" in PetscFEOpenCLGenerateIntegrationCode()
176 …x_q)}{\\partial x_d}$, Value of the derivative of basis function $i$ in direction $x_d$ at $x_q$\n" in PetscFEOpenCLGenerateIntegrationCode()
177 " /* Geometric data */\n" in PetscFEOpenCLGenerateIntegrationCode()
178 … " __local %s detJ[%d]; //[N_t]; // $|J(x_q)|$, Jacobian determinant at $x_q$\n" in PetscFEOpenCLGenerateIntegrationCode()
179 … " __local %s invJ[%d];//[N_t*dim*dim]; // $J^{-1}(x_q)$, Jacobian inverse at $x_q$\n", in PetscFEOpenCLGenerateIntegrationCode()
182 " /* FEM data */\n" in PetscFEOpenCLGenerateIntegrationCode()
183 …/[N_t*N_bt]; // Coefficients $u_i$ of the field $u|_{\\mathcal{T}} = \\sum_i u_i \\phi_i$\n", in PetscFEOpenCLGenerateIntegrationCode()
186 …ts $a_i$ of the auxiliary field $a|_{\\mathcal{T}} = \\sum_i a_i \\phi^R_i$\n", &count, numeric_st… in PetscFEOpenCLGenerateIntegrationCode()
190 " /* Intermediate calculations */\n" in PetscFEOpenCLGenerateIntegrationCode()
191 … __local %s f_0[%d]; //[N_t*N_sqc]; // $f_0(u(x_q), \\nabla u(x_q)) |J(x_q)| w_q$\n", in PetscFEOpenCLGenerateIntegrationCode()
194 … f_1[%d]; //[N_t*N_sqc]; // $f_1(u(x_q), \\nabla u(x_q)) |J(x_q)| w_q$\n", &count, numeric_st… in PetscFEOpenCLGenerateIntegrationCode()
197 " /* Output data */\n" in PetscFEOpenCLGenerateIntegrationCode()
198 … " %s e_i; // Coefficient $e_i$ of the residual\n\n", in PetscFEOpenCLGenerateIntegrationCode()
202 " /* These should be generated inline */\n" in PetscFEOpenCLGenerateIntegrationCode()
203 " /* Load quadrature weights */\n" in PetscFEOpenCLGenerateIntegrationCode()
204 " w = weights[qidx];\n" in PetscFEOpenCLGenerateIntegrationCode()
205 " /* Load basis tabulation \\phi_i for this cell */\n" in PetscFEOpenCLGenerateIntegrationCode()
206 " if (tidx < N_bt*N_q) {\n" in PetscFEOpenCLGenerateIntegrationCode()
207 " phi_i[tidx] = Basis[tidx];\n" in PetscFEOpenCLGenerateIntegrationCode()
208 " phiDer_i[tidx] = BasisDerivatives[tidx];\n" in PetscFEOpenCLGenerateIntegrationCode()
209 " }\n\n", in PetscFEOpenCLGenerateIntegrationCode()
213 " for (int batch = 0; batch < N_cb; ++batch) {\n" in PetscFEOpenCLGenerateIntegrationCode()
214 " /* Load geometry */\n" in PetscFEOpenCLGenerateIntegrationCode()
215 … " detJ[tidx] = jacobianDeterminants[Goffset+batch*N_bc+tidx];\n" in PetscFEOpenCLGenerateIntegrationCode()
216 " for (int n = 0; n < dim*dim; ++n) {\n" in PetscFEOpenCLGenerateIntegrationCode()
217 " const int offset = n*N_t;\n" in PetscFEOpenCLGenerateIntegrationCode()
218 … " invJ[offset+tidx] = jacobianInverses[(Goffset+batch*N_bc)*dim*dim+offset+tidx];\n" in PetscFEOpenCLGenerateIntegrationCode()
219 " }\n" in PetscFEOpenCLGenerateIntegrationCode()
220 " /* Load coefficients u_i for this cell */\n" in PetscFEOpenCLGenerateIntegrationCode()
221 " for (int n = 0; n < N_bt; ++n) {\n" in PetscFEOpenCLGenerateIntegrationCode()
222 " const int offset = n*N_t;\n" in PetscFEOpenCLGenerateIntegrationCode()
223 … " u_i[offset+tidx] = coefficients[(Goffset*N_bt)+batch*N_t*N_b+offset+tidx];\n" in PetscFEOpenCLGenerateIntegrationCode()
224 " }\n", in PetscFEOpenCLGenerateIntegrationCode()
228 " /* Load coefficients a_i for this cell */\n" in PetscFEOpenCLGenerateIntegrationCode()
229 … " /* TODO: This should not be N_t here, it should be N_bc*N_comp_aux */\n" in PetscFEOpenCLGenerateIntegrationCode()
230 " a_i[tidx] = coefficientsAux[Goffset+batch*N_t+tidx];\n", in PetscFEOpenCLGenerateIntegrationCode()
235 " barrier(CLK_LOCAL_MEM_FENCE);\n" in PetscFEOpenCLGenerateIntegrationCode()
236 "\n" in PetscFEOpenCLGenerateIntegrationCode()
237 " /* Map coefficients to values at quadrature points */\n" in PetscFEOpenCLGenerateIntegrationCode()
238 " for (int c = 0; c < N_sqc; ++c) {\n" in PetscFEOpenCLGenerateIntegrationCode()
239 " const int cell = c*N_bl*N_b + blqidx;\n" in PetscFEOpenCLGenerateIntegrationCode()
240 … " const int fidx = (cell*N_q + qidx)*N_comp + cidx;\n", in PetscFEOpenCLGenerateIntegrationCode()
242 … " %s u[%d]; //[N_comp]; // $u(x_q)$, Value of the field at $x_q$\n", &count, numeric_st… in PetscFEOpenCLGenerateIntegrationCode()
243 …[%d]; //[N_comp]; // $\\nabla u(x_q)$, Value of the field gradient at $x_q$\n", &count, numeric_st… in PetscFEOpenCLGenerateIntegrationCode()
244 … %s a[%d]; //[1]; // $a(x_q)$, Value of the auxiliary fields at $x_q$\n", &count, numeric_st… in PetscFEOpenCLGenerateIntegrationCode()
245 … //[1]; // $\\nabla a(x_q)$, Value of the auxiliary field gradient at $x_q$\n", &count, numeric_st… in PetscFEOpenCLGenerateIntegrationCode()
247 "\n" in PetscFEOpenCLGenerateIntegrationCode()
248 " for (int comp = 0; comp < N_comp; ++comp) {\n", in PetscFEOpenCLGenerateIntegrationCode()
250 …(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " u[comp] = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
254 …SNPrintfCount(string_tail, end_of_buffer - string_tail, " gradU[comp].x = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
257 …tail, end_of_buffer - string_tail, " gradU[comp].x = 0.0; gradU[comp].y = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
260 … string_tail, " gradU[comp].x = 0.0; gradU[comp].y = 0.0; gradU[comp].z = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
264 PetscCallSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " }\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
265 …llSTR(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " a[0] = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
269 …PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " gradA[0].x = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
272 …(string_tail, end_of_buffer - string_tail, " gradA[0].x = 0.0; gradA[0].y = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
275 …of_buffer - string_tail, " gradA[0].x = 0.0; gradA[0].y = 0.0; gradA[0].z = 0.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
280 " /* Get field and derivatives at this quadrature point */\n" in PetscFEOpenCLGenerateIntegrationCode()
281 " for (int i = 0; i < N_b; ++i) {\n" in PetscFEOpenCLGenerateIntegrationCode()
282 " for (int comp = 0; comp < N_comp; ++comp) {\n" in PetscFEOpenCLGenerateIntegrationCode()
283 " const int b = i*N_comp+comp;\n" in PetscFEOpenCLGenerateIntegrationCode()
284 " const int pidx = qidx*N_bt + b;\n" in PetscFEOpenCLGenerateIntegrationCode()
285 " const int uidx = cell*N_bt + b;\n" in PetscFEOpenCLGenerateIntegrationCode()
286 " %s%d realSpaceDer;\n\n", in PetscFEOpenCLGenerateIntegrationCode()
288 …tring_tail, end_of_buffer - string_tail, " u[comp] += u_i[uidx]*phi_i[pidx];\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
293 …r.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y;\n" in PetscFEOpenCLGenerateIntegrationCode()
294 " gradU[comp].x += u_i[uidx]*realSpaceDer.x;\n" in PetscFEOpenCLGenerateIntegrationCode()
295 …r.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y;\n" in PetscFEOpenCLGenerateIntegrationCode()
296 " gradU[comp].y += u_i[uidx]*realSpaceDer.y;\n", in PetscFEOpenCLGenerateIntegrationCode()
301 …].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+0]*phiDer_i[pidx].z;\n" in PetscFEOpenCLGenerateIntegrationCode()
302 " gradU[comp].x += u_i[uidx]*realSpaceDer.x;\n" in PetscFEOpenCLGenerateIntegrationCode()
303 …].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+1]*phiDer_i[pidx].z;\n" in PetscFEOpenCLGenerateIntegrationCode()
304 " gradU[comp].y += u_i[uidx]*realSpaceDer.y;\n" in PetscFEOpenCLGenerateIntegrationCode()
305 …].x + invJ[cell*dim*dim+1*dim+2]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+2]*phiDer_i[pidx].z;\n" in PetscFEOpenCLGenerateIntegrationCode()
306 " gradU[comp].z += u_i[uidx]*realSpaceDer.z;\n", in PetscFEOpenCLGenerateIntegrationCode()
312 " }\n" in PetscFEOpenCLGenerateIntegrationCode()
313 " }\n", in PetscFEOpenCLGenerateIntegrationCode()
315 …SNPrintfCount(string_tail, end_of_buffer - string_tail, " a[0] += a_i[cell];\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
317 …g_tail, end_of_buffer - string_tail, " /* Process values at quadrature points */\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
320 …(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_0[fidx] = 4.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
322 …fCount(string_tail, end_of_buffer - string_tail, " f_1[fidx] = a[0]*gradU[cidx];\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
323 …PrintfCount(string_tail, end_of_buffer - string_tail, " f_1[fidx] = gradU[cidx];\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
327 …(PetscSNPrintfCount(string_tail, end_of_buffer - string_tail, " f_0[fidx] = 4.0;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
332 " switch (cidx) {\n" in PetscFEOpenCLGenerateIntegrationCode()
333 " case 0:\n" in PetscFEOpenCLGenerateIntegrationCode()
334 … " f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[0].x + gradU[0].x);\n" in PetscFEOpenCLGenerateIntegrationCode()
335 … " f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[0].y + gradU[1].x);\n" in PetscFEOpenCLGenerateIntegrationCode()
336 " break;\n" in PetscFEOpenCLGenerateIntegrationCode()
337 " case 1:\n" in PetscFEOpenCLGenerateIntegrationCode()
338 … " f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[1].x + gradU[0].y);\n" in PetscFEOpenCLGenerateIntegrationCode()
339 … " f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y) + mu*(gradU[1].y + gradU[1].y);\n" in PetscFEOpenCLGenerateIntegrationCode()
340 " }\n", in PetscFEOpenCLGenerateIntegrationCode()
345 " switch (cidx) {\n" in PetscFEOpenCLGenerateIntegrationCode()
346 " case 0:\n" in PetscFEOpenCLGenerateIntegrationCode()
347 … f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[0].x + gradU[0].x);\n" in PetscFEOpenCLGenerateIntegrationCode()
348 … f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[0].y + gradU[1].x);\n" in PetscFEOpenCLGenerateIntegrationCode()
349 … f_1[fidx].z = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[0].z + gradU[2].x);\n" in PetscFEOpenCLGenerateIntegrationCode()
350 " break;\n" in PetscFEOpenCLGenerateIntegrationCode()
351 " case 1:\n" in PetscFEOpenCLGenerateIntegrationCode()
352 … f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[1].x + gradU[0].y);\n" in PetscFEOpenCLGenerateIntegrationCode()
353 … f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[1].y + gradU[1].y);\n" in PetscFEOpenCLGenerateIntegrationCode()
354 … f_1[fidx].z = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[1].y + gradU[2].y);\n" in PetscFEOpenCLGenerateIntegrationCode()
355 " break;\n" in PetscFEOpenCLGenerateIntegrationCode()
356 " case 2:\n" in PetscFEOpenCLGenerateIntegrationCode()
357 … f_1[fidx].x = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[2].x + gradU[0].z);\n" in PetscFEOpenCLGenerateIntegrationCode()
358 … f_1[fidx].y = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[2].y + gradU[1].z);\n" in PetscFEOpenCLGenerateIntegrationCode()
359 … f_1[fidx].z = lambda*(gradU[0].x + gradU[1].y + gradU[2].z) + mu*(gradU[2].y + gradU[2].z);\n" in PetscFEOpenCLGenerateIntegrationCode()
360 " }\n", in PetscFEOpenCLGenerateIntegrationCode()
369 …intfCount(string_tail, end_of_buffer - string_tail, " f_0[fidx] *= detJ[cell]*w;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
373 …tfCount(string_tail, end_of_buffer - string_tail, " f_1[fidx].x *= detJ[cell]*w;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
376 …uffer - string_tail, " f_1[fidx].x *= detJ[cell]*w; f_1[fidx].y *= detJ[cell]*w;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
379 …_1[fidx].x *= detJ[cell]*w; f_1[fidx].y *= detJ[cell]*w; f_1[fidx].z *= detJ[cell]*w;\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
385 " }\n\n" in PetscFEOpenCLGenerateIntegrationCode()
386 " /* ==== TRANSPOSE THREADS ==== */\n" in PetscFEOpenCLGenerateIntegrationCode()
387 " barrier(CLK_LOCAL_MEM_FENCE);\n\n", in PetscFEOpenCLGenerateIntegrationCode()
391 " /* Map values at quadrature points to coefficients */\n" in PetscFEOpenCLGenerateIntegrationCode()
392 " for (int c = 0; c < N_sbc; ++c) {\n" in PetscFEOpenCLGenerateIntegrationCode()
393 … " const int cell = c*N_bl*N_q + blbidx; /* Cell number in batch */\n" in PetscFEOpenCLGenerateIntegrationCode()
394 "\n" in PetscFEOpenCLGenerateIntegrationCode()
395 " e_i = 0.0;\n" in PetscFEOpenCLGenerateIntegrationCode()
396 " for (int q = 0; q < N_q; ++q) {\n" in PetscFEOpenCLGenerateIntegrationCode()
397 " const int pidx = q*N_bt + bidx;\n" in PetscFEOpenCLGenerateIntegrationCode()
398 " const int fidx = (cell*N_q + q)*N_comp + cidx;\n" in PetscFEOpenCLGenerateIntegrationCode()
399 " %s%d realSpaceDer;\n\n", in PetscFEOpenCLGenerateIntegrationCode()
402 …ount(string_tail, end_of_buffer - string_tail, " e_i += phi_i[pidx]*f_0[fidx];\n", &count)); in PetscFEOpenCLGenerateIntegrationCode()
407 …r.x = invJ[cell*dim*dim+0*dim+0]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y;\n" in PetscFEOpenCLGenerateIntegrationCode()
408 " e_i += realSpaceDer.x*f_1[fidx].x;\n" in PetscFEOpenCLGenerateIntegrationCode()
409 …r.y = invJ[cell*dim*dim+0*dim+1]*phiDer_i[pidx].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y;\n" in PetscFEOpenCLGenerateIntegrationCode()
410 " e_i += realSpaceDer.y*f_1[fidx].y;\n", in PetscFEOpenCLGenerateIntegrationCode()
415 …].x + invJ[cell*dim*dim+1*dim+0]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+0]*phiDer_i[pidx].z;\n" in PetscFEOpenCLGenerateIntegrationCode()
416 " e_i += realSpaceDer.x*f_1[fidx].x;\n" in PetscFEOpenCLGenerateIntegrationCode()
417 …].x + invJ[cell*dim*dim+1*dim+1]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+1]*phiDer_i[pidx].z;\n" in PetscFEOpenCLGenerateIntegrationCode()
418 " e_i += realSpaceDer.y*f_1[fidx].y;\n" in PetscFEOpenCLGenerateIntegrationCode()
419 …].x + invJ[cell*dim*dim+1*dim+2]*phiDer_i[pidx].y + invJ[cell*dim*dim+2*dim+2]*phiDer_i[pidx].z;\n" in PetscFEOpenCLGenerateIntegrationCode()
420 " e_i += realSpaceDer.z*f_1[fidx].z;\n", in PetscFEOpenCLGenerateIntegrationCode()
426 " }\n" in PetscFEOpenCLGenerateIntegrationCode()
427 " /* Write element vector for N_{cbc} cells at a time */\n" in PetscFEOpenCLGenerateIntegrationCode()
428 … " elemVec[(Goffset + batch*N_bc + c*N_bl*N_q)*N_bt + tidx] = e_i;\n" in PetscFEOpenCLGenerateIntegrationCode()
429 " }\n" in PetscFEOpenCLGenerateIntegrationCode()
430 " /* ==== Could do one write per batch ==== */\n" in PetscFEOpenCLGenerateIntegrationCode()
431 " }\n" in PetscFEOpenCLGenerateIntegrationCode()
432 " return;\n" in PetscFEOpenCLGenerateIntegrationCode()
433 "}\n", in PetscFEOpenCLGenerateIntegrationCode()
454 …all(PetscPrintf(PetscObjectComm((PetscObject)fem), "OpenCL FE Integration Kernel:\n%s\n", buffer)); in PetscFEOpenCLGetIntegrationKernel()
461 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Build failed! Log:\n %s", errMsg); in PetscFEOpenCLGetIntegrationKernel()
468 static PetscErrorCode PetscFEOpenCLCalculateGrid(PetscFE fem, PetscInt N, PetscInt blockSize, size_… in PetscFEOpenCLCalculateGrid() argument
470 const PetscInt Nblocks = N / blockSize; in PetscFEOpenCLCalculateGrid()
473 …PetscCheck(!(N % blockSize), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid block size %d for %d ele… in PetscFEOpenCLCalculateGrid()
480 …IZ, "Could not find partition for %" PetscInt_FMT " with block size %" PetscInt_FMT, N, blockSize); in PetscFEOpenCLCalculateGrid()
575 …PetscCall(PetscInfo(fem, "GPU layout grid(%zu,%zu,%zu) block(%zu,%zu,%zu) with %d batches\n", x, y… in PetscFEIntegrateResidual_OpenCL()
576 PetscCall(PetscInfo(fem, " N_t: %d, N_cb: %d\n", N_t, N_cb)); in PetscFEIntegrateResidual_OpenCL()