| ceed-ref-restriction.c (07d5dec1642b3d8b5aca8f12f47bcc29bb156592) | ceed-ref-restriction.c (58c07c4fa7bdba34c2b29fbdcd58893d48c3fd9e) |
|---|---|
| 1// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include <ceed.h> --- 173 unchanged lines hidden (view full) --- 182 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 183 if (has_backend_strides) { 184 // CPU backend strides are {1, elem_size, elem_size*num_comp} 185 // This if brach is left separate to allow better inlining 186 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 187 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 188 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 189 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { | 1// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include <ceed.h> --- 173 unchanged lines hidden (view full) --- 182 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 183 if (has_backend_strides) { 184 // CPU backend strides are {1, elem_size, elem_size*num_comp} 185 // This if brach is left separate to allow better inlining 186 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 187 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 188 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 189 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { |
| 190 vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; | 190 CeedScalar uu_val; 191 192 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 193 CeedPragmaAtomic vv[n + k * elem_size + (e + j) * elem_size * num_comp] += uu_val; |
| 191 } 192 } 193 } 194 } 195 } else { 196 // User provided strides 197 CeedInt strides[3]; 198 199 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 200 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 201 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 202 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 203 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { | 194 } 195 } 196 } 197 } 198 } else { 199 // User provided strides 200 CeedInt strides[3]; 201 202 CeedCallBackend(CeedElemRestrictionGetStrides(rstr, &strides)); 203 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 204 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 205 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 206 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { |
| 204 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 205 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; | 207 CeedScalar uu_val; 208 209 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset]; 210 CeedPragmaAtomic vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += uu_val; |
| 206 } 207 } 208 } 209 } 210 } 211 return CEED_ERROR_SUCCESS; 212} 213 --- 4 unchanged lines hidden (view full) --- 218 CeedElemRestriction_Ref *impl; 219 220 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 221 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 222 for (CeedInt k = 0; k < num_comp; k++) { 223 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 224 // Iteration bound set to discard padding elements 225 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { | 211 } 212 } 213 } 214 } 215 } 216 return CEED_ERROR_SUCCESS; 217} 218 --- 4 unchanged lines hidden (view full) --- 223 CeedElemRestriction_Ref *impl; 224 225 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 226 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 227 for (CeedInt k = 0; k < num_comp; k++) { 228 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 229 // Iteration bound set to discard padding elements 230 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { |
| 226 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; | 231 CeedScalar uu_val; 232 233 uu_val = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset]; 234 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu_val; |
| 227 } 228 } 229 } 230 } 231 return CEED_ERROR_SUCCESS; 232} 233 234static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 235 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 236 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 237 // Restriction with orientations 238 CeedElemRestriction_Ref *impl; 239 240 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 241 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 242 for (CeedInt k = 0; k < num_comp; k++) { 243 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 244 // Iteration bound set to discard padding elements 245 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { | 235 } 236 } 237 } 238 } 239 return CEED_ERROR_SUCCESS; 240} 241 242static inline int CeedElemRestrictionApplyOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 243 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 244 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 245 // Restriction with orientations 246 CeedElemRestriction_Ref *impl; 247 248 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 249 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 250 for (CeedInt k = 0; k < num_comp; k++) { 251 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 252 // Iteration bound set to discard padding elements 253 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { |
| 246 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 247 uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); | 254 CeedScalar uu_val; 255 256 uu_val = uu[elem_size * (k * block_size + e * num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 257 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu_val; |
| 248 } 249 } 250 } 251 } 252 return CEED_ERROR_SUCCESS; 253} 254 255static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 256 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 257 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 258 // Restriction with tridiagonal transformation 259 CeedElemRestriction_Ref *impl; 260 261 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 262 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 263 for (CeedInt k = 0; k < num_comp; k++) { 264 // Iteration bound set to discard padding elements | 258 } 259 } 260 } 261 } 262 return CEED_ERROR_SUCCESS; 263} 264 265static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 266 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 267 CeedInt elem_size, CeedInt v_offset, const CeedScalar *uu, CeedScalar *vv) { 268 // Restriction with tridiagonal transformation 269 CeedElemRestriction_Ref *impl; 270 271 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 272 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 273 for (CeedInt k = 0; k < num_comp; k++) { 274 // Iteration bound set to discard padding elements |
| 265 CeedInt block_end = CeedIntMin(block_size, num_elem - e), n = 0; | 275 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 276 CeedInt n = 0; 277 |
| 266 for (CeedInt j = 0; j < block_end; j++) { | 278 for (CeedInt j = 0; j < block_end; j++) { |
| 267 vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += 268 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 269 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 270 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 271 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; | 279 CeedScalar uu_val; 280 281 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 282 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 283 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 284 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 285 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; |
| 272 } 273 for (n = 1; n < elem_size - 1; n++) { 274 for (CeedInt j = 0; j < block_end; j++) { | 286 } 287 for (n = 1; n < elem_size - 1; n++) { 288 for (CeedInt j = 0; j < block_end; j++) { |
| 275 vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += 276 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 277 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 278 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 279 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 280 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 281 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; | 289 CeedScalar uu_val; 290 291 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 292 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 293 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 294 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 295 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 296 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 297 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; |
| 282 } 283 } 284 for (CeedInt j = 0; j < block_end; j++) { | 298 } 299 } 300 for (CeedInt j = 0; j < block_end; j++) { |
| 285 vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += 286 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 287 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 288 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 289 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; | 301 CeedScalar uu_val; 302 303 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 304 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 305 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 306 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 307 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; |
| 290 } 291 } 292 } 293 return CEED_ERROR_SUCCESS; 294} 295 296static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 297 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 298 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 299 const CeedScalar *uu, CeedScalar *vv) { 300 // Restriction with (unsigned) tridiagonal transformation 301 CeedElemRestriction_Ref *impl; 302 303 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 304 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 305 for (CeedInt k = 0; k < num_comp; k++) { 306 // Iteration bound set to discard padding elements | 308 } 309 } 310 } 311 return CEED_ERROR_SUCCESS; 312} 313 314static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, 315 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 316 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedInt v_offset, 317 const CeedScalar *uu, CeedScalar *vv) { 318 // Restriction with (unsigned) tridiagonal transformation 319 CeedElemRestriction_Ref *impl; 320 321 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 322 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 323 for (CeedInt k = 0; k < num_comp; k++) { 324 // Iteration bound set to discard padding elements |
| 307 CeedInt n = 0; | |
| 308 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); | 325 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); |
| 326 CeedInt n = 0; |
|
| 309 310 for (CeedInt j = 0; j < block_end; j++) { | 327 328 for (CeedInt j = 0; j < block_end; j++) { |
| 311 vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += 312 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 313 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 314 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 315 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); | 329 CeedScalar uu_val; 330 331 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 332 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 333 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 334 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 335 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; |
| 316 } 317 for (n = 1; n < elem_size - 1; n++) { 318 for (CeedInt j = 0; j < block_end; j++) { | 336 } 337 for (n = 1; n < elem_size - 1; n++) { 338 for (CeedInt j = 0; j < block_end; j++) { |
| 319 vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += 320 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 321 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 322 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 323 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 324 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 325 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); | 339 CeedScalar uu_val; 340 341 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 342 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 343 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 344 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 345 uu[e * elem_size * num_comp + (k * elem_size + n + 1) * block_size + j - v_offset] * 346 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 347 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; |
| 326 } 327 } 328 for (CeedInt j = 0; j < block_end; j++) { | 348 } 349 } 350 for (CeedInt j = 0; j < block_end; j++) { |
| 329 vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += 330 uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 331 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 332 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 333 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); | 351 CeedScalar uu_val; 352 353 uu_val = uu[e * elem_size * num_comp + (k * elem_size + n - 1) * block_size + j - v_offset] * 354 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 355 uu[e * elem_size * num_comp + (k * elem_size + n) * block_size + j - v_offset] * 356 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 357 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += uu_val; |
| 334 } 335 } 336 } 337 return CEED_ERROR_SUCCESS; 338} 339 340static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop, 341 CeedTransposeMode t_mode, const CeedScalar *uu, CeedScalar *vv) { --- 348 unchanged lines hidden (view full) --- 690} 691 692//------------------------------------------------------------------------------ 693// ElemRestriction Create 694//------------------------------------------------------------------------------ 695int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 696 const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 697 Ceed ceed; | 358 } 359 } 360 } 361 return CEED_ERROR_SUCCESS; 362} 363 364static inline int CeedElemRestrictionApplyAtPointsInElement_Ref_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, CeedInt stop, 365 CeedTransposeMode t_mode, const CeedScalar *uu, CeedScalar *vv) { --- 348 unchanged lines hidden (view full) --- 714} 715 716//------------------------------------------------------------------------------ 717// ElemRestriction Create 718//------------------------------------------------------------------------------ 719int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 720 const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 721 Ceed ceed; |
| 698 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; | 722 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride; |
| 699 CeedRestrictionType rstr_type; 700 CeedElemRestriction_Ref *impl; 701 702 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 703 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 704 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 705 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 706 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); --- 18 unchanged lines hidden (view full) --- 725 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 726 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 727 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 728 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 729 } 730 } 731 732 // Copy data | 723 CeedRestrictionType rstr_type; 724 CeedElemRestriction_Ref *impl; 725 726 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 727 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 728 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 729 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 730 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); --- 18 unchanged lines hidden (view full) --- 749 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 750 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 751 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 752 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 753 } 754 } 755 756 // Copy data |
| 733 if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 734 num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); | |
| 735 switch (copy_mode) { 736 case CEED_COPY_VALUES: | 757 switch (copy_mode) { 758 case CEED_COPY_VALUES: |
| 737 CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 738 memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); | 759 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 760 memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); |
| 739 impl->offsets = impl->offsets_allocated; 740 break; 741 case CEED_OWN_POINTER: 742 impl->offsets_allocated = (CeedInt *)offsets; 743 impl->offsets = impl->offsets_allocated; 744 break; 745 case CEED_USE_POINTER: 746 impl->offsets = offsets; 747 } 748 749 // Orientation data 750 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 751 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 752 switch (copy_mode) { 753 case CEED_COPY_VALUES: | 761 impl->offsets = impl->offsets_allocated; 762 break; 763 case CEED_OWN_POINTER: 764 impl->offsets_allocated = (CeedInt *)offsets; 765 impl->offsets = impl->offsets_allocated; 766 break; 767 case CEED_USE_POINTER: 768 impl->offsets = offsets; 769 } 770 771 // Orientation data 772 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 773 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 774 switch (copy_mode) { 775 case CEED_COPY_VALUES: |
| 754 CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 755 memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); | 776 CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orients_allocated)); 777 memcpy(impl->orients_allocated, orients, num_elem * elem_size * sizeof(orients[0])); |
| 756 impl->orients = impl->orients_allocated; 757 break; 758 case CEED_OWN_POINTER: 759 impl->orients_allocated = (bool *)orients; 760 impl->orients = impl->orients_allocated; 761 break; 762 case CEED_USE_POINTER: 763 impl->orients = orients; 764 } 765 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 766 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 767 switch (copy_mode) { 768 case CEED_COPY_VALUES: | 778 impl->orients = impl->orients_allocated; 779 break; 780 case CEED_OWN_POINTER: 781 impl->orients_allocated = (bool *)orients; 782 impl->orients = impl->orients_allocated; 783 break; 784 case CEED_USE_POINTER: 785 impl->orients = orients; 786 } 787 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 788 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 789 switch (copy_mode) { 790 case CEED_COPY_VALUES: |
| 769 CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 770 memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); | 791 CeedCallBackend(CeedMalloc(num_elem * 3 * elem_size, &impl->curl_orients_allocated)); 792 memcpy(impl->curl_orients_allocated, curl_orients, num_elem * 3 * elem_size * sizeof(curl_orients[0])); |
| 771 impl->curl_orients = impl->curl_orients_allocated; 772 break; 773 case CEED_OWN_POINTER: 774 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 775 impl->curl_orients = impl->curl_orients_allocated; 776 break; 777 case CEED_USE_POINTER: 778 impl->curl_orients = curl_orients; --- 71 unchanged lines hidden --- | 793 impl->curl_orients = impl->curl_orients_allocated; 794 break; 795 case CEED_OWN_POINTER: 796 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 797 impl->curl_orients = impl->curl_orients_allocated; 798 break; 799 case CEED_USE_POINTER: 800 impl->curl_orients = curl_orients; --- 71 unchanged lines hidden --- |