1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApplyNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 20 const CeedInt comp_stride, CeedInt start, CeedInt stop, bool use_orients, CeedVector u, 21 CeedVector v, CeedRequest *request) { 22 Ceed ceed; 23 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 24 CeedElemRestriction_Ref *impl; 25 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 26 const CeedScalar *uu; 27 CeedScalar *vv; 28 CeedInt num_elem, elem_size, v_offset; 29 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 30 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 31 v_offset = start * blk_size * elem_size * num_comp; 32 CeedRestrictionType rstr_type; 33 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 34 35 // Restriction from L-vector to E-vector 36 // Perform: v = r * u 37 // vv has shape [elem_size, num_comp, num_elem], row-major 38 // uu has shape [nnodes, num_comp] 39 // Overwrite for notranspose mode 40 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 41 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 42 switch (rstr_type) { 43 case CEED_RESTRICTION_STRIDED: { 44 // No offsets provided, Identity Restriction 45 bool has_backend_strides; 46 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47 if (has_backend_strides) { 48 // CPU backend strides are {1, elem_size, elem_size*num_comp} 49 // This if branch is left separate to allow better inlining 50 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 51 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 52 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 53 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 54 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 55 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 56 } 57 } 58 } 59 } 60 } else { 61 // User provided strides 62 CeedInt strides[3]; 63 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 64 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 65 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 66 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 67 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 68 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 69 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 70 } 71 } 72 } 73 } 74 } 75 } break; 76 case CEED_RESTRICTION_DEFAULT: { 77 // Default restriction with offsets 78 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 79 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 80 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 81 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 82 } 83 } 84 } 85 } break; 86 case CEED_RESTRICTION_ORIENTED: { 87 // Restriction with orientations 88 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 89 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 90 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 91 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = 92 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (use_orients && impl->orients[i + e * elem_size] ? -1.0 : 1.0); 93 } 94 } 95 } 96 } break; 97 case CEED_RESTRICTION_CURL_ORIENTED: { 98 // Restriction with tridiagonal transformation 99 CeedCheck(elem_size > 1, ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_CURL_ORIENTED should always have elem_size > 1"); 100 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 101 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 102 CeedInt n = 0; 103 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 104 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 105 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 106 (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 107 : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 108 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 109 (use_orients ? impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size] 110 : abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size])); 111 } 112 for (n = 1; n < elem_size - 1; n++) { 113 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 114 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 115 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 116 (use_orients ? impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] 117 : abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size])) + 118 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 119 (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 120 : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 121 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 122 (use_orients ? impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size] 123 : abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size])); 124 } 125 } 126 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 127 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 128 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 129 (use_orients ? impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] 130 : abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size])) + 131 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 132 (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 133 : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])); 134 } 135 } 136 } 137 } break; 138 } 139 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 140 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 141 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 142 return CEED_ERROR_SUCCESS; 143 } 144 145 //------------------------------------------------------------------------------ 146 // Core ElemRestriction Apply Transpose Code 147 //------------------------------------------------------------------------------ 148 static inline int CeedElemRestrictionApplyTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 149 const CeedInt comp_stride, CeedInt start, CeedInt stop, bool use_orients, CeedVector u, 150 CeedVector v, CeedRequest *request) { 151 Ceed ceed; 152 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 153 CeedElemRestriction_Ref *impl; 154 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 155 const CeedScalar *uu; 156 CeedScalar *vv; 157 CeedInt num_elem, elem_size, v_offset; 158 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 159 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 160 v_offset = start * blk_size * elem_size * num_comp; 161 CeedRestrictionType rstr_type; 162 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 163 164 // Restriction from E-vector to L-vector 165 // Performing v += r^T * u 166 // uu has shape [elem_size, num_comp, num_elem], row-major 167 // vv has shape [nnodes, num_comp] 168 // Sum into for transpose mode 169 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 170 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 171 switch (rstr_type) { 172 case CEED_RESTRICTION_STRIDED: { 173 // No offsets provided, Identity Restriction 174 bool has_backend_strides; 175 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 176 if (has_backend_strides) { 177 // CPU backend strides are {1, elem_size, elem_size*num_comp} 178 // This if brach is left separate to allow better inlining 179 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 180 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 181 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 182 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 183 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 184 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 185 } 186 } 187 } 188 } 189 } else { 190 // User provided strides 191 CeedInt strides[3]; 192 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 193 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 194 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 195 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 196 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 197 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 198 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 199 } 200 } 201 } 202 } 203 } 204 } break; 205 case CEED_RESTRICTION_DEFAULT: { 206 // Default restriction with offsets 207 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 208 for (CeedInt k = 0; k < num_comp; k++) { 209 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 210 // Iteration bound set to discard padding elements 211 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 212 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 213 } 214 } 215 } 216 } 217 } break; 218 case CEED_RESTRICTION_ORIENTED: { 219 // Restriction with orientations 220 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 221 for (CeedInt k = 0; k < num_comp; k++) { 222 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 223 // Iteration bound set to discard padding elements 224 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 225 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 226 uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (use_orients && impl->orients[j + e * elem_size] ? -1.0 : 1.0); 227 } 228 } 229 } 230 } 231 } break; 232 case CEED_RESTRICTION_CURL_ORIENTED: { 233 // Restriction with tridiagonal transformation 234 CeedCheck(elem_size > 1, ceed, CEED_ERROR_BACKEND, "CEED_RESTRICTION_CURL_ORIENTED should always have elem_size > 1"); 235 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 236 for (CeedInt k = 0; k < num_comp; k++) { 237 // Iteration bound set to discard padding elements 238 CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 239 CeedScalar vvn[blk_size]; 240 for (CeedInt j = 0; j < blk_end; j++) { 241 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 242 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 243 (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 244 : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 245 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 246 (use_orients ? impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size] 247 : abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size])); 248 } 249 for (n = 1; n < elem_size - 1; n++) { 250 for (CeedInt j = 0; j < blk_end; j++) { 251 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 252 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 253 (use_orients ? impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] 254 : abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size])) + 255 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 256 (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 257 : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])) + 258 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 259 (use_orients ? impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size] 260 : abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size])); 261 } 262 } 263 for (CeedInt j = 0; j < blk_end; j++) { 264 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 265 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 266 (use_orients ? impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] 267 : abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size])) + 268 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 269 (use_orients ? impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] 270 : abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size])); 271 } 272 } 273 } 274 } break; 275 } 276 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 277 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 278 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 279 return CEED_ERROR_SUCCESS; 280 } 281 282 //------------------------------------------------------------------------------ 283 // ElemRestriction Apply - Common Sizes 284 //------------------------------------------------------------------------------ 285 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 286 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 287 CeedRequest *request) { 288 if (t_mode == CEED_TRANSPOSE) { 289 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, u, v, request); 290 } else { 291 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, u, v, request); 292 } 293 } 294 295 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 296 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 297 CeedRequest *request) { 298 if (t_mode == CEED_TRANSPOSE) { 299 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 1, 1, start, stop, use_orients, u, v, request); 300 } else { 301 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 1, 1, start, stop, use_orients, u, v, request); 302 } 303 } 304 305 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 306 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 307 CeedRequest *request) { 308 if (t_mode == CEED_TRANSPOSE) { 309 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, u, v, request); 310 } else { 311 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, u, v, request); 312 } 313 } 314 315 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 316 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 317 CeedRequest *request) { 318 if (t_mode == CEED_TRANSPOSE) { 319 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 1, 8, 1, start, stop, use_orients, u, v, request); 320 } else { 321 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 1, 8, 1, start, stop, use_orients, u, v, request); 322 } 323 } 324 325 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 326 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 327 CeedRequest *request) { 328 if (t_mode == CEED_TRANSPOSE) { 329 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, u, v, request); 330 } else { 331 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, u, v, request); 332 } 333 } 334 335 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 336 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 337 CeedRequest *request) { 338 if (t_mode == CEED_TRANSPOSE) { 339 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 1, 1, start, stop, use_orients, u, v, request); 340 } else { 341 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 1, 1, start, stop, use_orients, u, v, request); 342 } 343 } 344 345 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 346 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 347 CeedRequest *request) { 348 if (t_mode == CEED_TRANSPOSE) { 349 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, u, v, request); 350 } else { 351 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, u, v, request); 352 } 353 } 354 355 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 356 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 357 CeedRequest *request) { 358 if (t_mode == CEED_TRANSPOSE) { 359 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 3, 8, 1, start, stop, use_orients, u, v, request); 360 } else { 361 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 3, 8, 1, start, stop, use_orients, u, v, request); 362 } 363 } 364 365 // LCOV_EXCL_START 366 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 367 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 368 CeedRequest *request) { 369 if (t_mode == CEED_TRANSPOSE) { 370 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, u, v, request); 371 } else { 372 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, u, v, request); 373 } 374 } 375 // LCOV_EXCL_STOP 376 377 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 378 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 379 CeedRequest *request) { 380 if (t_mode == CEED_TRANSPOSE) { 381 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 1, 1, start, stop, use_orients, u, v, request); 382 } else { 383 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 1, 1, start, stop, use_orients, u, v, request); 384 } 385 } 386 387 // LCOV_EXCL_START 388 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 389 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 390 CeedRequest *request) { 391 if (t_mode == CEED_TRANSPOSE) { 392 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, u, v, request); 393 } else { 394 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, u, v, request); 395 } 396 } 397 // LCOV_EXCL_STOP 398 399 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 400 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 401 CeedRequest *request) { 402 if (t_mode == CEED_TRANSPOSE) { 403 return CeedElemRestrictionApplyTranspose_Ref_Core(r, 5, 8, 1, start, stop, use_orients, u, v, request); 404 } else { 405 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, 5, 8, 1, start, stop, use_orients, u, v, request); 406 } 407 } 408 409 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 410 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, 411 CeedVector v, CeedRequest *request) { 412 if (t_mode == CEED_TRANSPOSE) { 413 return CeedElemRestrictionApplyTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, use_orients, u, v, request); 414 } else { 415 return CeedElemRestrictionApplyNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, use_orients, u, v, request); 416 } 417 } 418 419 //------------------------------------------------------------------------------ 420 // ElemRestriction Apply 421 //------------------------------------------------------------------------------ 422 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 423 CeedInt num_blk, blk_size, num_comp, comp_stride; 424 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 425 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 426 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 427 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 428 CeedElemRestriction_Ref *impl; 429 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 430 431 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request); 432 } 433 434 //------------------------------------------------------------------------------ 435 // ElemRestriction Apply Unsigned 436 //------------------------------------------------------------------------------ 437 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 438 CeedInt num_blk, blk_size, num_comp, comp_stride; 439 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 440 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 441 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 442 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 443 CeedElemRestriction_Ref *impl; 444 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 445 446 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request); 447 } 448 449 //------------------------------------------------------------------------------ 450 // ElemRestriction Apply Block 451 //------------------------------------------------------------------------------ 452 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 453 CeedRequest *request) { 454 CeedInt blk_size, num_comp, comp_stride; 455 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 456 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 457 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 458 CeedElemRestriction_Ref *impl; 459 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 460 461 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request); 462 } 463 464 //------------------------------------------------------------------------------ 465 // ElemRestriction Get Offsets 466 //------------------------------------------------------------------------------ 467 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 468 CeedElemRestriction_Ref *impl; 469 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 470 Ceed ceed; 471 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 472 473 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 474 475 *offsets = impl->offsets; 476 return CEED_ERROR_SUCCESS; 477 } 478 479 //------------------------------------------------------------------------------ 480 // ElemRestriction Get Orientations 481 //------------------------------------------------------------------------------ 482 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 483 CeedElemRestriction_Ref *impl; 484 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 485 Ceed ceed; 486 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 487 488 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 489 490 *orients = impl->orients; 491 return CEED_ERROR_SUCCESS; 492 } 493 494 //------------------------------------------------------------------------------ 495 // ElemRestriction Get Curl-Conforming Orientations 496 //------------------------------------------------------------------------------ 497 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 498 CeedElemRestriction_Ref *impl; 499 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 500 Ceed ceed; 501 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 502 503 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 504 505 *curl_orients = impl->curl_orients; 506 return CEED_ERROR_SUCCESS; 507 } 508 509 //------------------------------------------------------------------------------ 510 // ElemRestriction Destroy 511 //------------------------------------------------------------------------------ 512 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 513 CeedElemRestriction_Ref *impl; 514 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 515 516 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 517 CeedCallBackend(CeedFree(&impl->orients_allocated)); 518 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 519 CeedCallBackend(CeedFree(&impl)); 520 return CEED_ERROR_SUCCESS; 521 } 522 523 //------------------------------------------------------------------------------ 524 // ElemRestriction Create 525 //------------------------------------------------------------------------------ 526 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 527 const CeedInt8 *curl_orients, CeedElemRestriction r) { 528 CeedElemRestriction_Ref *impl; 529 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 530 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 531 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 532 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 533 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 534 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 535 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 536 Ceed ceed; 537 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 538 539 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 540 CeedCallBackend(CeedCalloc(1, &impl)); 541 542 // Offsets data 543 CeedRestrictionType rstr_type; 544 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 545 if (rstr_type != CEED_RESTRICTION_STRIDED) { 546 // Check indices for ref or memcheck backends 547 Ceed parent_ceed = ceed, curr_ceed = NULL; 548 while (parent_ceed != curr_ceed) { 549 curr_ceed = parent_ceed; 550 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 551 } 552 const char *resource; 553 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 554 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 555 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 556 CeedSize l_size; 557 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 558 559 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 560 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 561 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 562 } 563 } 564 565 // Copy data 566 switch (copy_mode) { 567 case CEED_COPY_VALUES: 568 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 569 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 570 impl->offsets = impl->offsets_allocated; 571 break; 572 case CEED_OWN_POINTER: 573 impl->offsets_allocated = (CeedInt *)offsets; 574 impl->offsets = impl->offsets_allocated; 575 break; 576 case CEED_USE_POINTER: 577 impl->offsets = offsets; 578 } 579 580 // Orientation data 581 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 582 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 583 switch (copy_mode) { 584 case CEED_COPY_VALUES: 585 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 586 memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 587 impl->orients = impl->orients_allocated; 588 break; 589 case CEED_OWN_POINTER: 590 impl->orients_allocated = (bool *)orients; 591 impl->orients = impl->orients_allocated; 592 break; 593 case CEED_USE_POINTER: 594 impl->orients = orients; 595 } 596 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 597 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 598 switch (copy_mode) { 599 case CEED_COPY_VALUES: 600 CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 601 memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 602 impl->curl_orients = impl->curl_orients_allocated; 603 break; 604 case CEED_OWN_POINTER: 605 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 606 impl->curl_orients = impl->curl_orients_allocated; 607 break; 608 case CEED_USE_POINTER: 609 impl->curl_orients = curl_orients; 610 } 611 } 612 } 613 614 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 615 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 616 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 617 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 618 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 619 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 620 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 621 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 622 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 623 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 624 625 // Set apply function based upon num_comp, blk_size, and comp_stride 626 CeedInt idx = -1; 627 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 628 switch (idx) { 629 case 110: 630 impl->Apply = CeedElemRestrictionApply_Ref_110; 631 break; 632 case 111: 633 impl->Apply = CeedElemRestrictionApply_Ref_111; 634 break; 635 case 180: 636 impl->Apply = CeedElemRestrictionApply_Ref_180; 637 break; 638 case 181: 639 impl->Apply = CeedElemRestrictionApply_Ref_181; 640 break; 641 case 310: 642 impl->Apply = CeedElemRestrictionApply_Ref_310; 643 break; 644 case 311: 645 impl->Apply = CeedElemRestrictionApply_Ref_311; 646 break; 647 case 380: 648 impl->Apply = CeedElemRestrictionApply_Ref_380; 649 break; 650 case 381: 651 impl->Apply = CeedElemRestrictionApply_Ref_381; 652 break; 653 // LCOV_EXCL_START 654 case 510: 655 impl->Apply = CeedElemRestrictionApply_Ref_510; 656 break; 657 // LCOV_EXCL_STOP 658 case 511: 659 impl->Apply = CeedElemRestrictionApply_Ref_511; 660 break; 661 // LCOV_EXCL_START 662 case 580: 663 impl->Apply = CeedElemRestrictionApply_Ref_580; 664 break; 665 // LCOV_EXCL_STOP 666 case 581: 667 impl->Apply = CeedElemRestrictionApply_Ref_581; 668 break; 669 default: 670 impl->Apply = CeedElemRestrictionApply_Ref_Core; 671 break; 672 } 673 674 return CEED_ERROR_SUCCESS; 675 } 676 677 //------------------------------------------------------------------------------ 678