1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-ref.h" 15 16 //------------------------------------------------------------------------------ 17 // Core ElemRestriction Apply Code 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 20 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 21 CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 22 // No offsets provided, identity restriction 23 bool has_backend_strides; 24 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 25 if (has_backend_strides) { 26 // CPU backend strides are {1, elem_size, elem_size*num_comp} 27 // This if branch is left separate to allow better inlining 28 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 29 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 30 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 31 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 32 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 33 uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 34 } 35 } 36 } 37 } 38 } else { 39 // User provided strides 40 CeedInt strides[3]; 41 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 42 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 43 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 44 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 45 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 46 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 47 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 48 } 49 } 50 } 51 } 52 } 53 return CEED_ERROR_SUCCESS; 54 } 55 56 static inline int CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 57 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 58 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 59 // Default restriction with offsets 60 CeedElemRestriction_Ref *impl; 61 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 62 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 63 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 64 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 65 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 66 } 67 } 68 } 69 return CEED_ERROR_SUCCESS; 70 } 71 72 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 73 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 74 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 75 // Restriction with orientations 76 CeedElemRestriction_Ref *impl; 77 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 78 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 79 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 80 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 81 vv[elem_size * (k * blk_size + e * num_comp) + i - v_offset] = 82 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 83 } 84 } 85 } 86 return CEED_ERROR_SUCCESS; 87 } 88 89 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 90 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 91 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, 92 CeedScalar *vv) { 93 // Restriction with tridiagonal transformation 94 CeedElemRestriction_Ref *impl; 95 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 96 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 97 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 98 CeedInt n = 0; 99 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 100 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 101 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 102 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 103 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 104 impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 105 } 106 for (n = 1; n < elem_size - 1; n++) { 107 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 108 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 109 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 110 impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 111 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 112 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 113 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 114 impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]; 115 } 116 } 117 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 118 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 119 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 120 impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size] + 121 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 122 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 123 } 124 } 125 } 126 return CEED_ERROR_SUCCESS; 127 } 128 129 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, 130 const CeedInt blk_size, const CeedInt comp_stride, CeedInt start, 131 CeedInt stop, CeedInt num_elem, CeedInt elem_size, 132 CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 133 // Restriction with (unsigned) tridiagonal transformation 134 CeedElemRestriction_Ref *impl; 135 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 136 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 137 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 138 CeedInt n = 0; 139 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 140 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 141 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 142 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 143 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 144 abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 145 } 146 for (n = 1; n < elem_size - 1; n++) { 147 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 148 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 149 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 150 abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 151 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 152 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 153 uu[impl->offsets[j + (n + 1) * blk_size + e * elem_size] + k * comp_stride] * 154 abs(impl->curl_orients[j + (3 * n + 2) * blk_size + e * 3 * elem_size]); 155 } 156 } 157 CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 158 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 159 uu[impl->offsets[j + (n - 1) * blk_size + e * elem_size] + k * comp_stride] * 160 abs(impl->curl_orients[j + (3 * n + 0) * blk_size + e * 3 * elem_size]) + 161 uu[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] * 162 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 163 } 164 } 165 } 166 return CEED_ERROR_SUCCESS; 167 } 168 169 static inline int CeedElemRestrictionApplyStridedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 170 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 171 CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 172 // No offsets provided, identity restriction 173 bool has_backend_strides; 174 CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 175 if (has_backend_strides) { 176 // CPU backend strides are {1, elem_size, elem_size*num_comp} 177 // This if brach is left separate to allow better inlining 178 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 179 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 180 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 181 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 182 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 183 } 184 } 185 } 186 } 187 } else { 188 // User provided strides 189 CeedInt strides[3]; 190 CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 191 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 192 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 193 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 194 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 195 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 196 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 197 } 198 } 199 } 200 } 201 } 202 return CEED_ERROR_SUCCESS; 203 } 204 205 static inline int CeedElemRestrictionApplyStandardTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 206 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 207 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 208 // Default restriction with offsets 209 CeedElemRestriction_Ref *impl; 210 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 211 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 212 for (CeedInt k = 0; k < num_comp; k++) { 213 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 214 // Iteration bound set to discard padding elements 215 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 216 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset]; 217 } 218 } 219 } 220 } 221 return CEED_ERROR_SUCCESS; 222 } 223 224 static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 225 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 226 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 227 // Restriction with orientations 228 CeedElemRestriction_Ref *impl; 229 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 230 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 231 for (CeedInt k = 0; k < num_comp; k++) { 232 for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 233 // Iteration bound set to discard padding elements 234 for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 235 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 236 uu[elem_size * (k * blk_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 237 } 238 } 239 } 240 } 241 return CEED_ERROR_SUCCESS; 242 } 243 244 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, 245 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 246 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 247 // Restriction with tridiagonal transformation 248 CeedElemRestriction_Ref *impl; 249 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 250 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 251 for (CeedInt k = 0; k < num_comp; k++) { 252 // Iteration bound set to discard padding elements 253 CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 254 for (CeedInt j = 0; j < blk_end; j++) { 255 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 256 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 257 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 258 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 259 impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 260 } 261 for (n = 1; n < elem_size - 1; n++) { 262 for (CeedInt j = 0; j < blk_end; j++) { 263 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 264 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 265 impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 266 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 267 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size] + 268 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 269 impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]; 270 } 271 } 272 for (CeedInt j = 0; j < blk_end; j++) { 273 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 274 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 275 impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size] + 276 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 277 impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]; 278 } 279 } 280 } 281 return CEED_ERROR_SUCCESS; 282 } 283 284 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, 285 const CeedInt blk_size, const CeedInt comp_stride, CeedInt start, 286 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 287 const CeedScalar *uu, CeedScalar *vv) { 288 // Restriction with (unsigned) tridiagonal transformation 289 CeedElemRestriction_Ref *impl; 290 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 291 for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 292 for (CeedInt k = 0; k < num_comp; k++) { 293 // Iteration bound set to discard padding elements 294 CeedInt blk_end = CeedIntMin(blk_size, num_elem - e), n = 0; 295 for (CeedInt j = 0; j < blk_end; j++) { 296 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 297 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 298 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 299 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 300 abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 301 } 302 for (n = 1; n < elem_size - 1; n++) { 303 for (CeedInt j = 0; j < blk_end; j++) { 304 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 305 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 306 abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 307 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 308 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]) + 309 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * blk_size + j - v_offset] * 310 abs(impl->curl_orients[j + (3 * n + 3) * blk_size + e * 3 * elem_size]); 311 } 312 } 313 for (CeedInt j = 0; j < blk_end; j++) { 314 vv[impl->offsets[j + n * blk_size + e * elem_size] + k * comp_stride] += 315 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * blk_size + j - v_offset] * 316 abs(impl->curl_orients[j + (3 * n - 1) * blk_size + e * 3 * elem_size]) + 317 uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] * 318 abs(impl->curl_orients[j + (3 * n + 1) * blk_size + e * 3 * elem_size]); 319 } 320 } 321 } 322 return CEED_ERROR_SUCCESS; 323 } 324 325 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 326 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, 327 CeedVector u, CeedVector v, CeedRequest *request) { 328 const CeedScalar *uu; 329 CeedScalar *vv; 330 CeedInt num_elem, elem_size, v_offset; 331 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 332 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 333 v_offset = start * blk_size * elem_size * num_comp; 334 CeedRestrictionType rstr_type; 335 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 336 337 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 338 if (t_mode == CEED_TRANSPOSE) { 339 // Sum into for transpose mode, E-vector to L-vector 340 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 341 } else { 342 // Overwrite for notranspose mode, L-vector to E-vector 343 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 344 } 345 if (t_mode == CEED_TRANSPOSE) { 346 // Restriction from E-vector to L-vector 347 // Performing v += r^T * u 348 // uu has shape [elem_size, num_comp, num_elem], row-major 349 // vv has shape [nnodes, num_comp] 350 // Sum into for transpose mode 351 switch (rstr_type) { 352 case CEED_RESTRICTION_STRIDED: 353 CeedElemRestrictionApplyStridedTranspose_Ref_Core(r, num_comp, blk_size, start, stop, num_elem, elem_size, v_offset, uu, vv); 354 break; 355 case CEED_RESTRICTION_STANDARD: 356 CeedElemRestrictionApplyStandardTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 357 break; 358 case CEED_RESTRICTION_ORIENTED: 359 if (use_signs) { 360 CeedElemRestrictionApplyOrientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 361 } else { 362 CeedElemRestrictionApplyStandardTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 363 } 364 break; 365 case CEED_RESTRICTION_CURL_ORIENTED: 366 if (use_signs && use_orients) { 367 CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 368 vv); 369 } else if (use_orients) { 370 CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, 371 v_offset, uu, vv); 372 } else { 373 CeedElemRestrictionApplyStandardTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 374 } 375 break; 376 } 377 } else { 378 // Restriction from L-vector to E-vector 379 // Perform: v = r * u 380 // vv has shape [elem_size, num_comp, num_elem], row-major 381 // uu has shape [nnodes, num_comp] 382 // Overwrite for notranspose mode 383 switch (rstr_type) { 384 case CEED_RESTRICTION_STRIDED: 385 CeedElemRestrictionApplyStridedNoTranspose_Ref_Core(r, num_comp, blk_size, start, stop, num_elem, elem_size, v_offset, uu, vv); 386 break; 387 case CEED_RESTRICTION_STANDARD: 388 CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, vv); 389 break; 390 case CEED_RESTRICTION_ORIENTED: 391 if (use_signs) { 392 CeedElemRestrictionApplyOrientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 393 vv); 394 } else { 395 CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 396 vv); 397 } 398 break; 399 case CEED_RESTRICTION_CURL_ORIENTED: 400 if (use_signs && use_orients) { 401 CeedElemRestrictionApplyCurlOrientedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 402 vv); 403 } else if (use_orients) { 404 CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, 405 v_offset, uu, vv); 406 } else { 407 CeedElemRestrictionApplyStandardNoTranspose_Ref_Core(r, num_comp, blk_size, comp_stride, start, stop, num_elem, elem_size, v_offset, uu, 408 vv); 409 } 410 break; 411 } 412 } 413 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 414 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 415 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 416 return CEED_ERROR_SUCCESS; 417 } 418 419 //------------------------------------------------------------------------------ 420 // ElemRestriction Apply - Common Sizes 421 //------------------------------------------------------------------------------ 422 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 423 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 424 CeedVector v, CeedRequest *request) { 425 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 426 } 427 428 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 429 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 430 CeedVector v, CeedRequest *request) { 431 return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 432 } 433 434 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 435 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 436 CeedVector v, CeedRequest *request) { 437 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 438 } 439 440 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 441 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 442 CeedVector v, CeedRequest *request) { 443 return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 444 } 445 446 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 447 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 448 CeedVector v, CeedRequest *request) { 449 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 450 } 451 452 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 453 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 454 CeedVector v, CeedRequest *request) { 455 return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 456 } 457 458 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 459 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 460 CeedVector v, CeedRequest *request) { 461 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 462 } 463 464 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 465 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 466 CeedVector v, CeedRequest *request) { 467 return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 468 } 469 470 // LCOV_EXCL_START 471 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 472 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 473 CeedVector v, CeedRequest *request) { 474 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 475 } 476 // LCOV_EXCL_STOP 477 478 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 479 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 480 CeedVector v, CeedRequest *request) { 481 return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 482 } 483 484 // LCOV_EXCL_START 485 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 486 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 487 CeedVector v, CeedRequest *request) { 488 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, use_signs, use_orients, u, v, request); 489 } 490 // LCOV_EXCL_STOP 491 492 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 493 CeedInt start, CeedInt stop, CeedTransposeMode t_mode, bool use_signs, bool use_orients, CeedVector u, 494 CeedVector v, CeedRequest *request) { 495 return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, use_signs, use_orients, u, v, request); 496 } 497 498 //------------------------------------------------------------------------------ 499 // ElemRestriction Apply 500 //------------------------------------------------------------------------------ 501 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 502 CeedInt num_blk, blk_size, num_comp, comp_stride; 503 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 504 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 505 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 506 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 507 CeedElemRestriction_Ref *impl; 508 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 509 510 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, true, true, u, v, request); 511 } 512 513 //------------------------------------------------------------------------------ 514 // ElemRestriction Apply Unsigned 515 //------------------------------------------------------------------------------ 516 static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 517 CeedInt num_blk, blk_size, num_comp, comp_stride; 518 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 519 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 520 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 521 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 522 CeedElemRestriction_Ref *impl; 523 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 524 525 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, true, u, v, request); 526 } 527 528 //------------------------------------------------------------------------------ 529 // ElemRestriction Apply Unoriented 530 //------------------------------------------------------------------------------ 531 static int CeedElemRestrictionApplyUnoriented_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 532 CeedInt num_blk, blk_size, num_comp, comp_stride; 533 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 534 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 535 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 536 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 537 CeedElemRestriction_Ref *impl; 538 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 539 540 return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, false, false, u, v, request); 541 } 542 543 //------------------------------------------------------------------------------ 544 // ElemRestriction Apply Block 545 //------------------------------------------------------------------------------ 546 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 547 CeedRequest *request) { 548 CeedInt blk_size, num_comp, comp_stride; 549 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 550 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 551 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 552 CeedElemRestriction_Ref *impl; 553 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 554 555 return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request); 556 } 557 558 //------------------------------------------------------------------------------ 559 // ElemRestriction Get Offsets 560 //------------------------------------------------------------------------------ 561 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 562 CeedElemRestriction_Ref *impl; 563 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 564 Ceed ceed; 565 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 566 567 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 568 569 *offsets = impl->offsets; 570 return CEED_ERROR_SUCCESS; 571 } 572 573 //------------------------------------------------------------------------------ 574 // ElemRestriction Get Orientations 575 //------------------------------------------------------------------------------ 576 static int CeedElemRestrictionGetOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 577 CeedElemRestriction_Ref *impl; 578 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 579 Ceed ceed; 580 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 581 582 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 583 584 *orients = impl->orients; 585 return CEED_ERROR_SUCCESS; 586 } 587 588 //------------------------------------------------------------------------------ 589 // ElemRestriction Get Curl-Conforming Orientations 590 //------------------------------------------------------------------------------ 591 static int CeedElemRestrictionGetCurlOrientations_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 592 CeedElemRestriction_Ref *impl; 593 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 594 Ceed ceed; 595 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 596 597 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 598 599 *curl_orients = impl->curl_orients; 600 return CEED_ERROR_SUCCESS; 601 } 602 603 //------------------------------------------------------------------------------ 604 // ElemRestriction Destroy 605 //------------------------------------------------------------------------------ 606 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 607 CeedElemRestriction_Ref *impl; 608 CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 609 610 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 611 CeedCallBackend(CeedFree(&impl->orients_allocated)); 612 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 613 CeedCallBackend(CeedFree(&impl)); 614 return CEED_ERROR_SUCCESS; 615 } 616 617 //------------------------------------------------------------------------------ 618 // ElemRestriction Create 619 //------------------------------------------------------------------------------ 620 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 621 const CeedInt8 *curl_orients, CeedElemRestriction r) { 622 CeedElemRestriction_Ref *impl; 623 CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 624 CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 625 CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 626 CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 627 CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 628 CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 629 CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 630 Ceed ceed; 631 CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 632 633 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 634 CeedCallBackend(CeedCalloc(1, &impl)); 635 636 // Offsets data 637 CeedRestrictionType rstr_type; 638 CeedCallBackend(CeedElemRestrictionGetType(r, &rstr_type)); 639 if (rstr_type != CEED_RESTRICTION_STRIDED) { 640 // Check indices for ref or memcheck backends 641 Ceed parent_ceed = ceed, curr_ceed = NULL; 642 while (parent_ceed != curr_ceed) { 643 curr_ceed = parent_ceed; 644 CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 645 } 646 const char *resource; 647 CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 648 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 649 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 650 CeedSize l_size; 651 CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 652 653 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 654 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 655 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 656 } 657 } 658 659 // Copy data 660 switch (copy_mode) { 661 case CEED_COPY_VALUES: 662 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 663 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 664 impl->offsets = impl->offsets_allocated; 665 break; 666 case CEED_OWN_POINTER: 667 impl->offsets_allocated = (CeedInt *)offsets; 668 impl->offsets = impl->offsets_allocated; 669 break; 670 case CEED_USE_POINTER: 671 impl->offsets = offsets; 672 } 673 674 // Orientation data 675 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 676 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 677 switch (copy_mode) { 678 case CEED_COPY_VALUES: 679 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 680 memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); 681 impl->orients = impl->orients_allocated; 682 break; 683 case CEED_OWN_POINTER: 684 impl->orients_allocated = (bool *)orients; 685 impl->orients = impl->orients_allocated; 686 break; 687 case CEED_USE_POINTER: 688 impl->orients = orients; 689 } 690 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 691 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 692 switch (copy_mode) { 693 case CEED_COPY_VALUES: 694 CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 695 memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); 696 impl->curl_orients = impl->curl_orients_allocated; 697 break; 698 case CEED_OWN_POINTER: 699 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 700 impl->curl_orients = impl->curl_orients_allocated; 701 break; 702 case CEED_USE_POINTER: 703 impl->curl_orients = curl_orients; 704 } 705 } 706 } 707 708 CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 709 CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 710 CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 711 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 712 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 713 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Ref)); 714 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 715 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 716 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOrientations", CeedElemRestrictionGetOrientations_Ref)); 717 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Ref)); 718 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 719 720 // Set apply function based upon num_comp, blk_size, and comp_stride 721 CeedInt idx = -1; 722 if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 723 switch (idx) { 724 case 110: 725 impl->Apply = CeedElemRestrictionApply_Ref_110; 726 break; 727 case 111: 728 impl->Apply = CeedElemRestrictionApply_Ref_111; 729 break; 730 case 180: 731 impl->Apply = CeedElemRestrictionApply_Ref_180; 732 break; 733 case 181: 734 impl->Apply = CeedElemRestrictionApply_Ref_181; 735 break; 736 case 310: 737 impl->Apply = CeedElemRestrictionApply_Ref_310; 738 break; 739 case 311: 740 impl->Apply = CeedElemRestrictionApply_Ref_311; 741 break; 742 case 380: 743 impl->Apply = CeedElemRestrictionApply_Ref_380; 744 break; 745 case 381: 746 impl->Apply = CeedElemRestrictionApply_Ref_381; 747 break; 748 // LCOV_EXCL_START 749 case 510: 750 impl->Apply = CeedElemRestrictionApply_Ref_510; 751 break; 752 // LCOV_EXCL_STOP 753 case 511: 754 impl->Apply = CeedElemRestrictionApply_Ref_511; 755 break; 756 // LCOV_EXCL_START 757 case 580: 758 impl->Apply = CeedElemRestrictionApply_Ref_580; 759 break; 760 // LCOV_EXCL_STOP 761 case 581: 762 impl->Apply = CeedElemRestrictionApply_Ref_581; 763 break; 764 default: 765 impl->Apply = CeedElemRestrictionApply_Ref_Core; 766 break; 767 } 768 769 return CEED_ERROR_SUCCESS; 770 } 771 772 //------------------------------------------------------------------------------ 773