1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 20 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, 21 CeedVector v, CeedRequest *request) { 22 CeedElemRestriction_Ref *impl; 23 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 24 const CeedScalar *uu; 25 CeedScalar *vv; 26 CeedInt num_elem, elem_size, v_offset; 27 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 28 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 29 v_offset = start * blk_size * elem_size * num_comp; 30 CeedRestrictionType rstr_type; 31 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 32 33 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 34 if (t_mode == CEED_TRANSPOSE) { 35 // Sum into for transpose mode, e-vec to l-vec 36 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 37 } else { 38 // Overwrite for notranspose mode, l-vec to e-vec 39 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 40 } 41 // Restriction from L-vector to E-vector 42 // Perform: v = r * u 43 // vv has shape [elem_size, num_comp, num_elem], row-major 44 // uu has shape [nnodes, num_comp] 45 if (t_mode == CEED_NOTRANSPOSE) { 46 switch (rstr_type) { 47 case CEED_RESTRICTION_STRIDED: { 48 // No offsets provided, Identity Restriction 49 bool has_backend_strides; 50 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 51 if (has_backend_strides) { 52 // CPU backend strides are {1, elem_size, elem_size*num_comp} 53 // This if branch is left separate to allow better inlining 54 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 55 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 56 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 57 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 58 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 59 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 60 } 61 } 62 } 63 } 64 } else { 65 // User provided strides 66 CeedInt strides[3]; 67 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 68 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 69 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 70 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 71 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 72 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 73 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 74 } 75 } 76 } 77 } 78 } 79 } break; 80 case CEED_RESTRICTION_DEFAULT: 81 // Default restriction with offsets 82 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 83 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 84 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 85 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = uu[impl->offsets[i + elem_size * e] + k * comp_stride]; 86 } 87 } 88 } 89 break; 90 case CEED_RESTRICTION_ORIENTED: 91 // Restriction with orientations 92 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 93 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 94 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 95 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 96 uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (use_orients && impl->orients[i + elem_size * e] ? -1.0 : 1.0); 97 } 98 } 99 } 100 break; 101 case CEED_RESTRICTION_CURL_ORIENTED: 102 // Restriction with tridiagonal transformation 103 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 104 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 105 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 106 CeedInt ii = i % elem_size; 107 if (use_orients) { 108 if (ii == 0) { 109 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 110 uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 1 + 3 * elem_size * e] + 111 uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 2 + 3 * elem_size * e]; 112 } else if (ii == elem_size - 1) { 113 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 114 uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 0 + 3 * elem_size * e] + 115 uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 1 + 3 * elem_size * e]; 116 } else { 117 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 118 uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 0 + 3 * elem_size * e] + 119 uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 1 + 3 * elem_size * e] + 120 uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * impl->curl_orients[3 * i + 2 + 3 * elem_size * e]; 121 } 122 } else { 123 if (ii == 0) { 124 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 125 uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 1 + 3 * elem_size * e]) + 126 uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 2 + 3 * elem_size * e]); 127 } else if (ii == elem_size - 1) { 128 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 129 uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 0 + 3 * elem_size * e]) + 130 uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 1 + 3 * elem_size * e]); 131 } else { 132 vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 133 uu[impl->offsets[i - 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 0 + 3 * elem_size * e]) + 134 uu[impl->offsets[i + 0 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 1 + 3 * elem_size * e]) + 135 uu[impl->offsets[i + 1 + elem_size * e] + k * comp_stride] * abs(impl->curl_orients[3 * i + 2 + 3 * elem_size * e]); 136 } 137 } 138 } 139 } 140 } 141 break; 142 } 143 } else { 144 // Restriction from E-vector to L-vector 145 // Performing v += r^T * u 146 // uu has shape [elem_size, num_comp, num_elem] 147 // vv has shape [nnodes, num_comp] 148 switch (rstr_type) { 149 case CEED_RESTRICTION_STRIDED: { 150 // No offsets provided, Identity Restriction 151 bool has_backend_strides; 152 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 153 if (has_backend_strides) { 154 // CPU backend strides are {1, elem_size, elem_size*num_comp} 155 // This if brach is left separate to allow better inlining 156 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 157 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 158 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 159 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 160 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 161 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 162 } 163 } 164 } 165 } 166 } else { 167 // User provided strides 168 CeedInt strides[3]; 169 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 170 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 171 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 172 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 173 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 174 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 175 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 176 } 177 } 178 } 179 } 180 } 181 } break; 182 case CEED_RESTRICTION_DEFAULT: 183 // Default restriction with offsets 184 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 185 for (CeedInt k = 0; k < num_comp; k++) { 186 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 187 // Iteration bound set to discard padding elements 188 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 189 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset]; 190 } 191 } 192 } 193 } 194 break; 195 case CEED_RESTRICTION_ORIENTED: 196 // Restriction with orientations 197 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 198 for (CeedInt k = 0; k < num_comp; k++) { 199 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 200 // Iteration bound set to discard padding elements 201 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 202 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 203 uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (use_orients && impl->orients[j + e * elem_size] ? -1.0 : 1.0); 204 } 205 } 206 } 207 } 208 break; 209 case CEED_RESTRICTION_CURL_ORIENTED: 210 // Restriction with tridiagonal transformation 211 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 212 for (CeedInt k = 0; k < num_comp; k++) { 213 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 214 // Iteration bound set to discard padding elements 215 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 216 CeedInt jj = j % elem_size; 217 if (use_orients) { 218 if (jj == 0) { 219 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 220 uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size] + 221 uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] * impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size]; 222 } else if (jj == elem_size - 1) { 223 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 224 uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] * impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size] + 225 uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size]; 226 } else { 227 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 228 uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] * impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size] + 229 uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size] + 230 uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] * impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size]; 231 } 232 } else { 233 if (jj == 0) { 234 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * 235 abs(impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size]) + 236 uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] * 237 abs(impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size]); 238 } else if (jj == elem_size - 1) { 239 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] * 240 abs(impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size]) + 241 uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * 242 abs(impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size]); 243 } else { 244 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - 1 - v_offset] * 245 abs(impl->curl_orients[(j - 1) * 3 + 2 + e * 3 * elem_size]) + 246 uu[elem_size * (k * blk_size + num_comp * e) + j + 0 - v_offset] * 247 abs(impl->curl_orients[(j + 0) * 3 + 1 + e * 3 * elem_size]) + 248 uu[elem_size * (k * blk_size + num_comp * e) + j + 1 - v_offset] * 249 abs(impl->curl_orients[(j + 1) * 3 + 0 + e * 3 * elem_size]); 250 } 251 } 252 } 253 } 254 } 255 } 256 break; 257 } 258 } 259 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 260 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 261 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 262 return CEED_ERROR_SUCCESS; 263 } 264 265 //------------------------------------------------------------------------------ 266 // ElemRestriction Apply - Common Sizes 267 //------------------------------------------------------------------------------ 268 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 269 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 270 CeedRequest *request) { 271 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request); 272 } 273 274 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 275 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 276 CeedRequest *request) { 277 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, use_orients, t_mode, u, v, request); 278 } 279 280 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 281 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 282 CeedRequest *request) { 283 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request); 284 } 285 286 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 287 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 288 CeedRequest *request) { 289 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, use_orients, t_mode, u, v, request); 290 } 291 292 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 293 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 294 CeedRequest *request) { 295 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request); 296 } 297 298 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 299 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 300 CeedRequest *request) { 301 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, use_orients, t_mode, u, v, request); 302 } 303 304 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 305 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 306 CeedRequest *request) { 307 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request); 308 } 309 310 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 311 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 312 CeedRequest *request) { 313 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, use_orients, t_mode, u, v, request); 314 } 315 316 // LCOV_EXCL_START 317 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 318 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 319 CeedRequest *request) { 320 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orients, t_mode, u, v, request); 321 } 322 // LCOV_EXCL_STOP 323 324 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 325 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 326 CeedRequest *request) { 327 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, use_orients, t_mode, u, v, request); 328 } 329 330 // LCOV_EXCL_START 331 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 332 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 333 CeedRequest *request) { 334 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orients, t_mode, u, v, request); 335 } 336 // LCOV_EXCL_STOP 337 338 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 339 CeedInt start, CeedInt stop, bool use_orients, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 340 CeedRequest *request) { 341 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, use_orients, t_mode, u, v, request); 342 } 343 344 //------------------------------------------------------------------------------ 345 // ElemRestriction Apply 346 //------------------------------------------------------------------------------ 347 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 348 CeedInt num_blk, blk_size, num_comp, comp_stride; 349 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 350 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 351 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 352 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 353 CeedElemRestriction_Ref *impl; 354 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 355 356 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request); 357 } 358 359 //------------------------------------------------------------------------------ 360 // ElemRestriction Apply Unsigned 361 //------------------------------------------------------------------------------ 362 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 363 CeedInt num_blk, blk_size, num_comp, comp_stride; 364 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 365 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 366 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 367 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 368 CeedElemRestriction_Ref *impl; 369 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 370 371 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request); 372 } 373 374 //------------------------------------------------------------------------------ 375 // ElemRestriction Apply Block 376 //------------------------------------------------------------------------------ 377 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 378 CeedRequest *request) { 379 CeedInt blk_size, num_comp, comp_stride; 380 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 381 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 382 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 383 CeedElemRestriction_Ref *impl; 384 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 385 386 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request); 387 } 388 389 //------------------------------------------------------------------------------ 390 // ElemRestriction Get Offsets 391 //------------------------------------------------------------------------------ 392 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 393 CeedElemRestriction_Ref *impl; 394 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 395 Ceed ceed; 396 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 397 398 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 399 400 *offsets = impl->offsets; 401 return CEED_ERROR_SUCCESS; 402 } 403 404 //------------------------------------------------------------------------------ 405 // ElemRestriction Get Orientations 406 //------------------------------------------------------------------------------ 407 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 408 CeedElemRestriction_Ref *impl; 409 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 410 Ceed ceed; 411 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 412 413 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 414 415 *orients = impl->orients; 416 return CEED_ERROR_SUCCESS; 417 } 418 419 //------------------------------------------------------------------------------ 420 // ElemRestriction Get Curl-Conforming Orientations 421 //------------------------------------------------------------------------------ 422 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **curl_orients) { 423 CeedElemRestriction_Ref *impl; 424 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 425 Ceed ceed; 426 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 427 428 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 429 430 *curl_orients = impl->curl_orients; 431 return CEED_ERROR_SUCCESS; 432 } 433 434 //------------------------------------------------------------------------------ 435 // ElemRestriction Destroy 436 //------------------------------------------------------------------------------ 437 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 438 CeedElemRestriction_Ref *impl; 439 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 440 441 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 442 CeedCallBackend(CeedFree(&impl->orients_allocated)); 443 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 444 CeedCallBackend(CeedFree(&impl)); 445 return CEED_ERROR_SUCCESS; 446 } 447 448 //------------------------------------------------------------------------------ 449 // ElemRestriction Create 450 //------------------------------------------------------------------------------ 451 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 452 const CeedInt *curl_orients, CeedElemRestriction r) { 453 CeedElemRestriction_Ref *impl; 454 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 455 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 456 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 457 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 458 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 459 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 460 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 461 Ceed ceed; 462 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 463 464 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 465 CeedCallBackend(CeedCalloc(1, &impl)); 466 467 // Offsets data 468 CeedRestrictionType rstr_type; 469 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 470 if (rstr_type != CEED_RESTRICTION_STRIDED) { 471 // Check indices for ref or memcheck backends 472 Ceed parent_ceed = ceed, curr_ceed = NULL; 473 while (parent_ceed != curr_ceed) { 474 curr_ceed = parent_ceed; 475 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 476 } 477 const char *resource; 478 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 479 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 480 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 481 CeedSize l_size; 482 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 483 484 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 485 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 486 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 487 } 488 } 489 490 // Copy data 491 switch (copy_mode) { 492 case CEED_COPY_VALUES: 493 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 494 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 495 impl->offsets = impl->offsets_allocated; 496 break; 497 case CEED_OWN_POINTER: 498 impl->offsets_allocated = (CeedInt *)offsets; 499 impl->offsets = impl->offsets_allocated; 500 break; 501 case CEED_USE_POINTER: 502 impl->offsets = offsets; 503 } 504 505 // Orientation data 506 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 507 switch (copy_mode) { 508 case CEED_COPY_VALUES: 509 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 510 memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 511 impl->orients = impl->orients_allocated; 512 break; 513 case CEED_OWN_POINTER: 514 impl->orients_allocated = (bool *)orients; 515 impl->orients = impl->orients_allocated; 516 break; 517 case CEED_USE_POINTER: 518 impl->orients = orients; 519 } 520 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 521 switch (copy_mode) { 522 case CEED_COPY_VALUES: 523 CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 524 memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 525 impl->curl_orients = impl->curl_orients_allocated; 526 break; 527 case CEED_OWN_POINTER: 528 impl->curl_orients_allocated = (CeedInt *)curl_orients; 529 impl->curl_orients = impl->curl_orients_allocated; 530 break; 531 case CEED_USE_POINTER: 532 impl->curl_orients = curl_orients; 533 } 534 } 535 } 536 537 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 538 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 539 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 540 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 541 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 542 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 543 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 544 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 545 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 546 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 547 548 // Set apply function based upon num_comp, blk_size, and comp_stride 549 CeedInt idx = -1; 550 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 551 switch (idx) { 552 case 110: 553 impl->Apply = CeedElemRestrictionApply_Ref_110; 554 break; 555 case 111: 556 impl->Apply = CeedElemRestrictionApply_Ref_111; 557 break; 558 case 180: 559 impl->Apply = CeedElemRestrictionApply_Ref_180; 560 break; 561 case 181: 562 impl->Apply = CeedElemRestrictionApply_Ref_181; 563 break; 564 case 310: 565 impl->Apply = CeedElemRestrictionApply_Ref_310; 566 break; 567 case 311: 568 impl->Apply = CeedElemRestrictionApply_Ref_311; 569 break; 570 case 380: 571 impl->Apply = CeedElemRestrictionApply_Ref_380; 572 break; 573 case 381: 574 impl->Apply = CeedElemRestrictionApply_Ref_381; 575 break; 576 // LCOV_EXCL_START 577 case 510: 578 impl->Apply = CeedElemRestrictionApply_Ref_510; 579 break; 580 // LCOV_EXCL_STOP 581 case 511: 582 impl->Apply = CeedElemRestrictionApply_Ref_511; 583 break; 584 // LCOV_EXCL_START 585 case 580: 586 impl->Apply = CeedElemRestrictionApply_Ref_580; 587 break; 588 // LCOV_EXCL_STOP 589 case 581: 590 impl->Apply = CeedElemRestrictionApply_Ref_581; 591 break; 592 default: 593 impl->Apply = CeedElemRestrictionApply_Ref_Core; 594 break; 595 } 596 597 return CEED_ERROR_SUCCESS; 598 } 599 600 //------------------------------------------------------------------------------ 601