1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 20 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 21 CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 22 // No offsets provided, identity restriction 23 bool has_backend_strides; 24 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 25 if (has_backend_strides) { 26 // CPU backend strides are {1, elem_size, elem_size*num_comp} 27 // This if branch is left separate to allow better inlining 28 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 29 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 30 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 31 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 32 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 33 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 34 } 35 } 36 } 37 } 38 } else { 39 // User provided strides 40 CeedInt strides[3]; 41 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 42 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 43 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 44 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 45 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 46 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 47 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 48 } 49 } 50 } 51 } 52 } 53 return CEED_ERROR_SUCCESS; 54 } 55 56 static inline int CeedElemRestrictionApplyDefaultNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 57 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 58 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 59 // Default restriction with offsets 60 CeedElemRestriction_Ref *impl; 61 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 62 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 63 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 64 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 65 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 66 } 67 } 68 } 69 return CEED_ERROR_SUCCESS; 70 } 71 72 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 73 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 74 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 75 // Restriction with orientations 76 CeedElemRestriction_Ref *impl; 77 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 78 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 79 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 80 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 81 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = 82 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 83 } 84 } 85 } 86 return CEED_ERROR_SUCCESS; 87 } 88 89 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 90 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 91 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, 92 CeedScalar *vv) { 93 // Restriction with tridiagonal transformation 94 CeedElemRestriction_Ref *impl; 95 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 96 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 97 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 98 CeedInt n = 0; 99 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 100 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 101 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 102 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 103 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 104 impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 105 } 106 for (n = 1; n < elem_size - 1; n++) { 107 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 108 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 109 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 110 impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 111 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 112 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 113 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 114 impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 115 } 116 } 117 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 118 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 119 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 120 impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 121 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 122 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 123 } 124 } 125 } 126 return CEED_ERROR_SUCCESS; 127 } 128 129 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, 130 const CeedInt blk_size, const CeedInt comp_stride, CeedInt start, 131 CeedInt stop, CeedInt num_elem, CeedInt elem_size, 132 CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 133 // Restriction with (unsigned) tridiagonal transformation 134 CeedElemRestriction_Ref *impl; 135 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 136 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 137 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 138 CeedInt n = 0; 139 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 140 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 141 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 142 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 143 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 144 abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 145 } 146 for (n = 1; n < elem_size - 1; n++) { 147 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 148 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 149 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 150 abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 151 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 152 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 153 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 154 abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 155 } 156 } 157 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 158 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 159 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 160 abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 161 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 162 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 163 } 164 } 165 } 166 return CEED_ERROR_SUCCESS; 167 } 168 169 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 170 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 171 CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 172 // No offsets provided, identity restriction 173 bool has_backend_strides; 174 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 175 if (has_backend_strides) { 176 // CPU backend strides are {1, elem_size, elem_size*num_comp} 177 // This if brach is left separate to allow better inlining 178 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 179 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 180 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 181 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 182 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 183 } 184 } 185 } 186 } 187 } else { 188 // User provided strides 189 CeedInt strides[3]; 190 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 191 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 192 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 193 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 194 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 195 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 196 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 197 } 198 } 199 } 200 } 201 } 202 return CEED_ERROR_SUCCESS; 203 } 204 205 static inline int CeedElemRestrictionApplyDefaultTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 206 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 207 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 208 // Default restriction with offsets 209 CeedElemRestriction_Ref *impl; 210 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 211 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 212 for (CeedInt k = 0; k < num_comp; k++) { 213 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 214 // Iteration bound set to discard padding elements 215 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 216 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 217 } 218 } 219 } 220 } 221 return CEED_ERROR_SUCCESS; 222 } 223 224 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 225 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 226 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 227 // Restriction with orientations 228 CeedElemRestriction_Ref *impl; 229 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 230 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 231 for (CeedInt k = 0; k < num_comp; k++) { 232 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 233 // Iteration bound set to discard padding elements 234 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 235 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 236 uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 237 } 238 } 239 } 240 } 241 return CEED_ERROR_SUCCESS; 242 } 243 244 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 245 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 246 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 247 // Restriction with tridiagonal transformation 248 CeedElemRestriction_Ref *impl; 249 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 250 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 251 for (CeedInt k = 0; k < num_comp; k++) { 252 // Iteration bound set to discard padding elements 253 CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 254 for (CeedInt j = 0; j < blk_end; j++) { 255 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 256 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 257 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 258 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 259 impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 260 } 261 for (n = 1; n < elem_size - 1; n++) { 262 for (CeedInt j = 0; j < blk_end; j++) { 263 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 264 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 265 impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 266 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 267 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 268 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 269 impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 270 } 271 } 272 for (CeedInt j = 0; j < blk_end; j++) { 273 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 274 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 275 impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 276 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 277 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 278 } 279 } 280 } 281 return CEED_ERROR_SUCCESS; 282 } 283 284 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, 285 const CeedInt blk_size, const CeedInt comp_stride, CeedInt start, 286 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 287 const CeedScalar *uu, CeedScalar *vv) { 288 // Restriction with (unsigned) tridiagonal transformation 289 CeedElemRestriction_Ref *impl; 290 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 291 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 292 for (CeedInt k = 0; k < num_comp; k++) { 293 // Iteration bound set to discard padding elements 294 CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 295 for (CeedInt j = 0; j < blk_end; j++) { 296 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 297 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 298 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 299 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 300 abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 301 } 302 for (n = 1; n < elem_size - 1; n++) { 303 for (CeedInt j = 0; j < blk_end; j++) { 304 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 305 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 306 abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 307 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 308 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 309 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 310 abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 311 } 312 } 313 for (CeedInt j = 0; j < blk_end; j++) { 314 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 315 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 316 abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 317 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 318 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 319 } 320 } 321 } 322 return CEED_ERROR_SUCCESS; 323 } 324 325 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 326 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 327 CeedVector u, CeedVector v, CeedRequest *request) { 328 const CeedScalar *uu; 329 CeedScalar *vv; 330 CeedInt num_elem, elem_size, v_offset; 331 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 332 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 333 v_offset = start * blk_size * elem_size * num_comp; 334 CeedRestrictionType rstr_type; 335 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 336 337 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 338 if (t_mode == CEED_TRANSPOSE) { 339 // Sum into for transpose mode, E-vector to L-vector 340 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 341 } else { 342 // Overwrite for notranspose mode, L-vector to E-vector 343 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 344 } 345 if (t_mode == CEED_TRANSPOSE) { 346 // Restriction from E-vector to L-vector 347 // Performing v += r^T * u 348 // uu has shape [elem_size, num_comp, num_elem], row-major 349 // vv has shape [nnodes, num_comp] 350 // Sum into for transpose mode 351 switch (rstr_type) { 352 case CEED_RESTRICTION_STRIDED: 353 CeedElemRestrictionApplyStridedTranspose_Ref_Core(r, num_comp, blk_size, start, stop, num_elem, elem_size, v_offset, uu, vv); 354 break; 355 case CEED_RESTRICTION_DEFAULT: 356 CeedElemRestrictionApplyDefaultTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 357 break; 358 case CEED_RESTRICTION_ORIENTED: 359 if (use_signs) { 360 CeedElemRestrictionApplyOrientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 361 } else { 362 CeedElemRestrictionApplyDefaultTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 363 } 364 break; 365 case CEED_RESTRICTION_CURL_ORIENTED: 366 if (use_signs && use_orients) { 367 CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 368 vv); 369 } else if (use_orients) { 370 CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, 371 v_offset, uu, vv); 372 } else { 373 CeedElemRestrictionApplyDefaultTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 374 } 375 break; 376 } 377 } else { 378 // Restriction from L-vector to E-vector 379 // Perform: v = r * u 380 // vv has shape [elem_size, num_comp, num_elem], row-major 381 // uu has shape [nnodes, num_comp] 382 // Overwrite for notranspose mode 383 switch (rstr_type) { 384 case CEED_RESTRICTION_STRIDED: 385 CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(r, num_comp, blk_size, start, stop, num_elem, elem_size, v_offset, uu, vv); 386 break; 387 case CEED_RESTRICTION_DEFAULT: 388 CeedElemRestrictionApplyDefaultNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 389 break; 390 case CEED_RESTRICTION_ORIENTED: 391 if (use_signs) { 392 CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 393 vv); 394 } else { 395 CeedElemRestrictionApplyDefaultNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 396 } 397 break; 398 case CEED_RESTRICTION_CURL_ORIENTED: 399 if (use_signs && use_orients) { 400 CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 401 vv); 402 } else if (use_orients) { 403 CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, 404 v_offset, uu, vv); 405 } else { 406 CeedElemRestrictionApplyDefaultNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 407 } 408 break; 409 } 410 } 411 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 412 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 413 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 414 return CEED_ERROR_SUCCESS; 415 } 416 417 //------------------------------------------------------------------------------ 418 // ElemRestriction Apply - Common Sizes 419 //------------------------------------------------------------------------------ 420 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 421 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 422 CeedVector v, CeedRequest *request) { 423 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 424 } 425 426 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 427 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 428 CeedVector v, CeedRequest *request) { 429 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 430 } 431 432 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 433 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 434 CeedVector v, CeedRequest *request) { 435 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 436 } 437 438 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 439 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 440 CeedVector v, CeedRequest *request) { 441 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 442 } 443 444 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 445 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 446 CeedVector v, CeedRequest *request) { 447 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 448 } 449 450 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 451 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 452 CeedVector v, CeedRequest *request) { 453 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 454 } 455 456 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 457 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 458 CeedVector v, CeedRequest *request) { 459 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 460 } 461 462 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 463 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 464 CeedVector v, CeedRequest *request) { 465 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 466 } 467 468 // LCOV_EXCL_START 469 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 470 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 471 CeedVector v, CeedRequest *request) { 472 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 473 } 474 // LCOV_EXCL_STOP 475 476 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 477 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 478 CeedVector v, CeedRequest *request) { 479 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 480 } 481 482 // LCOV_EXCL_START 483 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 484 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 485 CeedVector v, CeedRequest *request) { 486 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 487 } 488 // LCOV_EXCL_STOP 489 490 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 491 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 492 CeedVector v, CeedRequest *request) { 493 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 494 } 495 496 //------------------------------------------------------------------------------ 497 // ElemRestriction Apply 498 //------------------------------------------------------------------------------ 499 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 500 CeedInt num_blk, blk_size, num_comp, comp_stride; 501 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 502 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 503 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 504 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 505 CeedElemRestriction_Ref *impl; 506 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 507 508 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, true, true, u, v, request); 509 } 510 511 //------------------------------------------------------------------------------ 512 // ElemRestriction Apply Unsigned 513 //------------------------------------------------------------------------------ 514 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 515 CeedInt num_blk, blk_size, num_comp, comp_stride; 516 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 517 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 518 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 519 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 520 CeedElemRestriction_Ref *impl; 521 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 522 523 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, true, u, v, request); 524 } 525 526 //------------------------------------------------------------------------------ 527 // ElemRestriction Apply Unoriented 528 //------------------------------------------------------------------------------ 529 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 530 CeedInt num_blk, blk_size, num_comp, comp_stride; 531 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 532 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 533 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 534 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 535 CeedElemRestriction_Ref *impl; 536 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 537 538 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, false, u, v, request); 539 } 540 541 //------------------------------------------------------------------------------ 542 // ElemRestriction Apply Block 543 //------------------------------------------------------------------------------ 544 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 545 CeedRequest *request) { 546 CeedInt blk_size, num_comp, comp_stride; 547 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 548 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 549 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 550 CeedElemRestriction_Ref *impl; 551 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 552 553 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request); 554 } 555 556 //------------------------------------------------------------------------------ 557 // ElemRestriction Get Offsets 558 //------------------------------------------------------------------------------ 559 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 560 CeedElemRestriction_Ref *impl; 561 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 562 Ceed ceed; 563 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 564 565 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 566 567 *offsets = impl->offsets; 568 return CEED_ERROR_SUCCESS; 569 } 570 571 //------------------------------------------------------------------------------ 572 // ElemRestriction Get Orientations 573 //------------------------------------------------------------------------------ 574 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 575 CeedElemRestriction_Ref *impl; 576 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 577 Ceed ceed; 578 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 579 580 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 581 582 *orients = impl->orients; 583 return CEED_ERROR_SUCCESS; 584 } 585 586 //------------------------------------------------------------------------------ 587 // ElemRestriction Get Curl-Conforming Orientations 588 //------------------------------------------------------------------------------ 589 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 590 CeedElemRestriction_Ref *impl; 591 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 592 Ceed ceed; 593 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 594 595 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 596 597 *curl_orients = impl->curl_orients; 598 return CEED_ERROR_SUCCESS; 599 } 600 601 //------------------------------------------------------------------------------ 602 // ElemRestriction Destroy 603 //------------------------------------------------------------------------------ 604 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 605 CeedElemRestriction_Ref *impl; 606 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 607 608 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 609 CeedCallBackend(CeedFree(&impl->orients_allocated)); 610 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 611 CeedCallBackend(CeedFree(&impl)); 612 return CEED_ERROR_SUCCESS; 613 } 614 615 //------------------------------------------------------------------------------ 616 // ElemRestriction Create 617 //------------------------------------------------------------------------------ 618 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 619 const CeedInt8 *curl_orients, CeedElemRestriction r) { 620 CeedElemRestriction_Ref *impl; 621 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 622 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 623 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 624 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 625 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 626 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 627 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 628 Ceed ceed; 629 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 630 631 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 632 CeedCallBackend(CeedCalloc(1, &impl)); 633 634 // Offsets data 635 CeedRestrictionType rstr_type; 636 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 637 if (rstr_type != CEED_RESTRICTION_STRIDED) { 638 // Check indices for ref or memcheck backends 639 Ceed parent_ceed = ceed, curr_ceed = NULL; 640 while (parent_ceed != curr_ceed) { 641 curr_ceed = parent_ceed; 642 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 643 } 644 const char *resource; 645 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 646 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 647 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 648 CeedSize l_size; 649 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 650 651 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 652 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 653 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 654 } 655 } 656 657 // Copy data 658 switch (copy_mode) { 659 case CEED_COPY_VALUES: 660 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 661 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 662 impl->offsets = impl->offsets_allocated; 663 break; 664 case CEED_OWN_POINTER: 665 impl->offsets_allocated = (CeedInt *)offsets; 666 impl->offsets = impl->offsets_allocated; 667 break; 668 case CEED_USE_POINTER: 669 impl->offsets = offsets; 670 } 671 672 // Orientation data 673 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 674 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 675 switch (copy_mode) { 676 case CEED_COPY_VALUES: 677 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 678 memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 679 impl->orients = impl->orients_allocated; 680 break; 681 case CEED_OWN_POINTER: 682 impl->orients_allocated = (bool *)orients; 683 impl->orients = impl->orients_allocated; 684 break; 685 case CEED_USE_POINTER: 686 impl->orients = orients; 687 } 688 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 689 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 690 switch (copy_mode) { 691 case CEED_COPY_VALUES: 692 CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 693 memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 694 impl->curl_orients = impl->curl_orients_allocated; 695 break; 696 case CEED_OWN_POINTER: 697 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 698 impl->curl_orients = impl->curl_orients_allocated; 699 break; 700 case CEED_USE_POINTER: 701 impl->curl_orients = curl_orients; 702 } 703 } 704 } 705 706 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 707 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 708 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 709 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 710 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 711 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 712 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 713 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 714 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 715 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 716 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 717 718 // Set apply function based upon num_comp, blk_size, and comp_stride 719 CeedInt idx = -1; 720 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 721 switch (idx) { 722 case 110: 723 impl->Apply = CeedElemRestrictionApply_Ref_110; 724 break; 725 case 111: 726 impl->Apply = CeedElemRestrictionApply_Ref_111; 727 break; 728 case 180: 729 impl->Apply = CeedElemRestrictionApply_Ref_180; 730 break; 731 case 181: 732 impl->Apply = CeedElemRestrictionApply_Ref_181; 733 break; 734 case 310: 735 impl->Apply = CeedElemRestrictionApply_Ref_310; 736 break; 737 case 311: 738 impl->Apply = CeedElemRestrictionApply_Ref_311; 739 break; 740 case 380: 741 impl->Apply = CeedElemRestrictionApply_Ref_380; 742 break; 743 case 381: 744 impl->Apply = CeedElemRestrictionApply_Ref_381; 745 break; 746 // LCOV_EXCL_START 747 case 510: 748 impl->Apply = CeedElemRestrictionApply_Ref_510; 749 break; 750 // LCOV_EXCL_STOP 751 case 511: 752 impl->Apply = CeedElemRestrictionApply_Ref_511; 753 break; 754 // LCOV_EXCL_START 755 case 580: 756 impl->Apply = CeedElemRestrictionApply_Ref_580; 757 break; 758 // LCOV_EXCL_STOP 759 case 581: 760 impl->Apply = CeedElemRestrictionApply_Ref_581; 761 break; 762 default: 763 impl->Apply = CeedElemRestrictionApply_Ref_Core; 764 break; 765 } 766 767 return CEED_ERROR_SUCCESS; 768 } 769 770 //------------------------------------------------------------------------------ 771