1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <stdbool.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "ceed-memcheck.h" 15 16 //------------------------------------------------------------------------------ 17 // Set backend strides 18 //------------------------------------------------------------------------------ 19 static inline int CeedElemRestrictionGetBackendStrides_Memcheck(CeedElemRestriction rstr, CeedInt strides[3]) { 20 CeedInt elem_size, num_comp, num_elem; 21 22 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 23 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 24 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 25 // Memcheck default, contiguous by component, then node 26 strides[0] = num_comp; 27 strides[1] = 1; 28 strides[2] = num_comp * elem_size; 29 /** 30 // CPU default, contiguous by node, then component 31 strides[0] = 1; 32 strides[1] = elem_size; 33 strides[2] = elem_size * num_comp; 34 35 // GPU default, contiguous by node, then element 36 strides[0] = 1; 37 strides[1] = num_elem * elem_size; 38 strides[2] = elem_size; 39 **/ 40 return CEED_ERROR_SUCCESS; 41 } 42 43 //------------------------------------------------------------------------------ 44 // Core ElemRestriction Apply Code 45 //------------------------------------------------------------------------------ 46 static inline int CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 47 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 48 CeedSize v_offset, const CeedScalar *__restrict__ uu, 49 CeedScalar *__restrict__ vv) { 50 // Get strides 51 bool has_backend_strides; 52 CeedInt strides[3] = {0}; 53 54 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 55 if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides)); 56 else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 57 58 // Apply restriction 59 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 60 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 61 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 62 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 63 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 64 uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 65 } 66 } 67 } 68 } 69 return CEED_ERROR_SUCCESS; 70 } 71 72 static inline int CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 73 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 74 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 75 CeedScalar *__restrict__ vv) { 76 // Default restriction with offsets 77 CeedElemRestriction_Memcheck *impl; 78 79 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 80 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 81 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 82 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 83 vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = uu[impl->offsets[i + e * elem_size] + k * comp_stride]; 84 } 85 } 86 } 87 return CEED_ERROR_SUCCESS; 88 } 89 90 static inline int CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, 91 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 92 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, 93 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 94 // Restriction with orientations 95 CeedElemRestriction_Memcheck *impl; 96 97 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 98 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 99 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 100 CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * block_size; i++) { 101 vv[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + i - v_offset] = 102 uu[impl->offsets[i + e * elem_size] + k * comp_stride] * (impl->orients[i + e * elem_size] ? -1.0 : 1.0); 103 } 104 } 105 } 106 return CEED_ERROR_SUCCESS; 107 } 108 109 static inline int CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, 110 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 111 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, 112 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 113 // Restriction with tridiagonal transformation 114 CeedElemRestriction_Memcheck *impl; 115 116 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 117 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 118 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 119 CeedInt n = 0; 120 121 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 122 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 123 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 124 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 125 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 126 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 127 } 128 CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 129 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 130 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 131 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 132 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 133 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 134 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 135 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 136 impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]; 137 } 138 } 139 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 140 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 141 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 142 impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size] + 143 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 144 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 145 } 146 } 147 } 148 return CEED_ERROR_SUCCESS; 149 } 150 151 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core( 152 CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop, 153 CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 154 // Restriction with (unsigned) tridiagonal transformation 155 CeedElemRestriction_Memcheck *impl; 156 157 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 158 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 159 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 160 CeedInt n = 0; 161 162 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 163 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 164 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 165 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 166 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 167 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 168 } 169 CeedPragmaSIMD for (n = 1; n < elem_size - 1; n++) { 170 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 171 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 172 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 173 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 174 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 175 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 176 uu[impl->offsets[j + (n + 1) * block_size + e * elem_size] + k * comp_stride] * 177 abs(impl->curl_orients[j + (3 * n + 2) * block_size + e * 3 * elem_size]); 178 } 179 } 180 CeedPragmaSIMD for (CeedInt j = 0; j < block_size; j++) { 181 vv[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] = 182 uu[impl->offsets[j + (n - 1) * block_size + e * elem_size] + k * comp_stride] * 183 abs(impl->curl_orients[j + (3 * n + 0) * block_size + e * 3 * elem_size]) + 184 uu[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] * 185 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 186 } 187 } 188 } 189 return CEED_ERROR_SUCCESS; 190 } 191 192 static inline int CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 193 CeedInt start, CeedInt stop, CeedInt num_elem, CeedInt elem_size, 194 CeedSize v_offset, const CeedScalar *__restrict__ uu, 195 CeedScalar *__restrict__ vv) { 196 // Get strides 197 bool has_backend_strides; 198 CeedInt strides[3] = {0}; 199 200 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 201 if (has_backend_strides) CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, strides)); 202 else CeedCallBackend(CeedElemRestrictionGetStrides(rstr, strides)); 203 204 // Apply restriction 205 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 206 CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 207 CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 208 CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(block_size, num_elem - e); j++) { 209 vv[n * (CeedSize)strides[0] + k * (CeedSize)strides[1] + (e + j) * (CeedSize)strides[2]] += 210 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset]; 211 } 212 } 213 } 214 } 215 return CEED_ERROR_SUCCESS; 216 } 217 218 static inline int CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 219 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 220 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 221 CeedScalar *__restrict__ vv) { 222 // Default restriction with offsets 223 CeedElemRestriction_Memcheck *impl; 224 225 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 226 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 227 for (CeedInt k = 0; k < num_comp; k++) { 228 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 229 // Iteration bound set to discard padding elements 230 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 231 CeedScalar vv_loc; 232 233 vv_loc = uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset]; 234 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 235 } 236 } 237 } 238 } 239 return CEED_ERROR_SUCCESS; 240 } 241 242 static inline int CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 243 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedInt num_elem, 244 CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, 245 CeedScalar *__restrict__ vv) { 246 // Restriction with orientations 247 CeedElemRestriction_Memcheck *impl; 248 249 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 250 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 251 for (CeedInt k = 0; k < num_comp; k++) { 252 for (CeedInt i = 0; i < elem_size * block_size; i += block_size) { 253 // Iteration bound set to discard padding elements 254 for (CeedInt j = i; j < i + CeedIntMin(block_size, num_elem - e); j++) { 255 CeedScalar vv_loc; 256 257 vv_loc = 258 uu[elem_size * (k * (CeedSize)block_size + e * (CeedSize)num_comp) + j - v_offset] * (impl->orients[j + e * elem_size] ? -1.0 : 1.0); 259 CeedPragmaAtomic vv[impl->offsets[j + e * elem_size] + k * comp_stride] += vv_loc; 260 } 261 } 262 } 263 } 264 return CEED_ERROR_SUCCESS; 265 } 266 267 static inline int CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, 268 const CeedInt block_size, const CeedInt comp_stride, CeedInt start, 269 CeedInt stop, CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, 270 const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 271 // Restriction with tridiagonal transformation 272 CeedElemRestriction_Memcheck *impl; 273 CeedScalar vv_loc[block_size]; 274 275 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 276 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 277 for (CeedInt k = 0; k < num_comp; k++) { 278 // Iteration bound set to discard padding elements 279 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 280 CeedInt n = 0; 281 282 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 283 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 284 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 285 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 286 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 287 } 288 for (CeedInt j = 0; j < block_end; j++) { 289 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 290 } 291 for (n = 1; n < elem_size - 1; n++) { 292 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 293 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 294 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 295 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 296 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size] + 297 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 298 impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]; 299 } 300 for (CeedInt j = 0; j < block_end; j++) { 301 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 302 } 303 } 304 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 305 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 306 impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size] + 307 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 308 impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]; 309 } 310 for (CeedInt j = 0; j < block_end; j++) { 311 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 312 } 313 } 314 } 315 return CEED_ERROR_SUCCESS; 316 } 317 318 static inline int CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core( 319 CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, const CeedInt comp_stride, CeedInt start, CeedInt stop, 320 CeedInt num_elem, CeedInt elem_size, CeedSize v_offset, const CeedScalar *__restrict__ uu, CeedScalar *__restrict__ vv) { 321 // Restriction with (unsigned) tridiagonal transformation 322 CeedElemRestriction_Memcheck *impl; 323 CeedScalar vv_loc[block_size]; 324 325 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 326 for (CeedInt e = start * block_size; e < stop * block_size; e += block_size) { 327 for (CeedInt k = 0; k < num_comp; k++) { 328 // Iteration bound set to discard padding elements 329 const CeedInt block_end = CeedIntMin(block_size, num_elem - e); 330 CeedInt n = 0; 331 332 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 333 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 334 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 335 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 336 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 337 } 338 for (CeedInt j = 0; j < block_end; j++) { 339 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 340 } 341 for (n = 1; n < elem_size - 1; n++) { 342 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 343 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 344 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 345 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 346 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]) + 347 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n + 1) * block_size + j - v_offset] * 348 abs(impl->curl_orients[j + (3 * n + 3) * block_size + e * 3 * elem_size]); 349 } 350 for (CeedInt j = 0; j < block_end; j++) { 351 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 352 } 353 } 354 CeedPragmaSIMD for (CeedInt j = 0; j < block_end; j++) { 355 vv_loc[j] = uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n - 1) * block_size + j - v_offset] * 356 abs(impl->curl_orients[j + (3 * n - 1) * block_size + e * 3 * elem_size]) + 357 uu[e * elem_size * (CeedSize)num_comp + (k * (CeedSize)elem_size + n) * block_size + j - v_offset] * 358 abs(impl->curl_orients[j + (3 * n + 1) * block_size + e * 3 * elem_size]); 359 } 360 for (CeedInt j = 0; j < block_end; j++) { 361 CeedPragmaAtomic vv[impl->offsets[j + n * block_size + e * elem_size] + k * comp_stride] += vv_loc[j]; 362 } 363 } 364 } 365 return CEED_ERROR_SUCCESS; 366 } 367 368 static inline int CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, CeedInt start, 369 CeedInt stop, CeedTransposeMode t_mode, const CeedScalar *__restrict__ uu, 370 CeedScalar *__restrict__ vv) { 371 CeedInt num_points, l_vec_offset; 372 CeedSize e_vec_offset = 0; 373 CeedElemRestriction_Memcheck *impl; 374 375 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 376 for (CeedInt e = start; e < stop; e++) { 377 l_vec_offset = impl->offsets[e]; 378 CeedCallBackend(CeedElemRestrictionGetNumPointsInElement(rstr, e, &num_points)); 379 if (t_mode == CEED_NOTRANSPOSE) { 380 for (CeedInt i = 0; i < num_points; i++) { 381 for (CeedInt j = 0; j < num_comp; j++) vv[j * (CeedSize)num_points + i + e_vec_offset] = uu[impl->offsets[i + l_vec_offset] * num_comp + j]; 382 } 383 } else { 384 for (CeedInt i = 0; i < num_points; i++) { 385 for (CeedInt j = 0; j < num_comp; j++) vv[impl->offsets[i + l_vec_offset] * num_comp + j] = uu[j * (CeedSize)num_points + i + e_vec_offset]; 386 } 387 } 388 e_vec_offset += num_points * (CeedSize)num_comp; 389 } 390 return CEED_ERROR_SUCCESS; 391 } 392 393 static inline int CeedElemRestrictionApply_Memcheck_Core(CeedElemRestriction rstr, const CeedInt num_comp, const CeedInt block_size, 394 const CeedInt comp_stride, CeedInt start, CeedInt stop, CeedTransposeMode t_mode, 395 bool use_signs, bool use_orients, CeedVector u, CeedVector v, CeedRequest *request) { 396 CeedInt num_elem, elem_size; 397 CeedSize v_offset; 398 CeedRestrictionType rstr_type; 399 const CeedScalar *uu; 400 CeedScalar *vv; 401 402 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 403 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 404 v_offset = start * block_size * elem_size * (CeedSize)num_comp; 405 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 406 CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 407 408 if (t_mode == CEED_TRANSPOSE) { 409 // Sum into for transpose mode, E-vector to L-vector 410 CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 411 } else { 412 // Overwrite for notranspose mode, L-vector to E-vector 413 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 414 } 415 416 if (t_mode == CEED_TRANSPOSE) { 417 // Restriction from E-vector to L-vector 418 // Performing v += r^T * u 419 // uu has shape [elem_size, num_comp, num_elem], row-major 420 // vv has shape [nnodes, num_comp] 421 // Sum into for transpose mode 422 switch (rstr_type) { 423 case CEED_RESTRICTION_STRIDED: 424 CeedCallBackend( 425 CeedElemRestrictionApplyStridedTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 426 break; 427 case CEED_RESTRICTION_STANDARD: 428 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 429 elem_size, v_offset, uu, vv)); 430 break; 431 case CEED_RESTRICTION_ORIENTED: 432 if (use_signs) { 433 CeedCallBackend(CeedElemRestrictionApplyOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 434 elem_size, v_offset, uu, vv)); 435 } else { 436 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 437 elem_size, v_offset, uu, vv)); 438 } 439 break; 440 case CEED_RESTRICTION_CURL_ORIENTED: 441 if (use_signs && use_orients) { 442 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 443 elem_size, v_offset, uu, vv)); 444 } else if (use_orients) { 445 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, 446 num_elem, elem_size, v_offset, uu, vv)); 447 } else { 448 CeedCallBackend(CeedElemRestrictionApplyOffsetTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 449 elem_size, v_offset, uu, vv)); 450 } 451 break; 452 case CEED_RESTRICTION_POINTS: 453 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 454 break; 455 } 456 } else { 457 // Restriction from L-vector to E-vector 458 // Perform: v = r * u 459 // vv has shape [elem_size, num_comp, num_elem], row-major 460 // uu has shape [nnodes, num_comp] 461 // Overwrite for notranspose mode 462 switch (rstr_type) { 463 case CEED_RESTRICTION_STRIDED: 464 CeedCallBackend( 465 CeedElemRestrictionApplyStridedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, start, stop, num_elem, elem_size, v_offset, uu, vv)); 466 break; 467 case CEED_RESTRICTION_STANDARD: 468 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 469 elem_size, v_offset, uu, vv)); 470 break; 471 case CEED_RESTRICTION_ORIENTED: 472 if (use_signs) { 473 CeedCallBackend(CeedElemRestrictionApplyOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 474 elem_size, v_offset, uu, vv)); 475 } else { 476 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 477 elem_size, v_offset, uu, vv)); 478 } 479 break; 480 case CEED_RESTRICTION_CURL_ORIENTED: 481 if (use_signs && use_orients) { 482 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, 483 num_elem, elem_size, v_offset, uu, vv)); 484 } else if (use_orients) { 485 CeedCallBackend(CeedElemRestrictionApplyCurlOrientedUnsignedNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, 486 num_elem, elem_size, v_offset, uu, vv)); 487 } else { 488 CeedCallBackend(CeedElemRestrictionApplyOffsetNoTranspose_Memcheck_Core(rstr, num_comp, block_size, comp_stride, start, stop, num_elem, 489 elem_size, v_offset, uu, vv)); 490 } 491 break; 492 case CEED_RESTRICTION_POINTS: 493 CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement_Memcheck_Core(rstr, num_comp, start, stop, t_mode, uu, vv)); 494 break; 495 } 496 } 497 CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 498 CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 499 if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 500 return CEED_ERROR_SUCCESS; 501 } 502 503 //------------------------------------------------------------------------------ 504 // ElemRestriction Apply 505 //------------------------------------------------------------------------------ 506 static int CeedElemRestrictionApply_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 507 CeedInt num_block, block_size, num_comp, comp_stride; 508 CeedElemRestriction_Memcheck *impl; 509 510 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 511 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 512 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 513 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 514 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 515 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, true, true, u, v, request)); 516 return CEED_ERROR_SUCCESS; 517 } 518 519 //------------------------------------------------------------------------------ 520 // ElemRestriction Apply Unsigned 521 //------------------------------------------------------------------------------ 522 static int CeedElemRestrictionApplyUnsigned_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 523 CeedRequest *request) { 524 CeedInt num_block, block_size, num_comp, comp_stride; 525 CeedElemRestriction_Memcheck *impl; 526 527 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 528 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 529 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 530 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 531 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 532 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, true, u, v, request)); 533 return CEED_ERROR_SUCCESS; 534 } 535 536 //------------------------------------------------------------------------------ 537 // ElemRestriction Apply Unoriented 538 //------------------------------------------------------------------------------ 539 static int CeedElemRestrictionApplyUnoriented_Memcheck(CeedElemRestriction rstr, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 540 CeedRequest *request) { 541 CeedInt num_block, block_size, num_comp, comp_stride; 542 CeedElemRestriction_Memcheck *impl; 543 544 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 545 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 546 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 547 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 548 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 549 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, 0, num_block, t_mode, false, false, u, v, request)); 550 return CEED_ERROR_SUCCESS; 551 } 552 553 //------------------------------------------------------------------------------ 554 // ElemRestriction Apply Points 555 //------------------------------------------------------------------------------ 556 static int CeedElemRestrictionApplyAtPointsInElement_Memcheck(CeedElemRestriction rstr, CeedInt elem, CeedTransposeMode t_mode, CeedVector u, 557 CeedVector v, CeedRequest *request) { 558 CeedInt num_comp; 559 CeedElemRestriction_Memcheck *impl; 560 561 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 562 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 563 return impl->Apply(rstr, num_comp, 0, 1, elem, elem + 1, t_mode, false, false, u, v, request); 564 } 565 566 //------------------------------------------------------------------------------ 567 // ElemRestriction Apply Block 568 //------------------------------------------------------------------------------ 569 static int CeedElemRestrictionApplyBlock_Memcheck(CeedElemRestriction rstr, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 570 CeedRequest *request) { 571 CeedInt block_size, num_comp, comp_stride; 572 CeedElemRestriction_Memcheck *impl; 573 574 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 575 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 576 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 577 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 578 CeedCallBackend(impl->Apply(rstr, num_comp, block_size, comp_stride, block, block + 1, t_mode, true, true, u, v, request)); 579 return CEED_ERROR_SUCCESS; 580 } 581 582 //------------------------------------------------------------------------------ 583 // ElemRestriction Get Offsets 584 //------------------------------------------------------------------------------ 585 static int CeedElemRestrictionGetOffsets_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 586 CeedElemRestriction_Memcheck *impl; 587 588 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 589 590 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 591 592 *offsets = impl->offsets; 593 return CEED_ERROR_SUCCESS; 594 } 595 596 //------------------------------------------------------------------------------ 597 // ElemRestriction Get Orientations 598 //------------------------------------------------------------------------------ 599 static int CeedElemRestrictionGetOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const bool **orients) { 600 CeedElemRestriction_Memcheck *impl; 601 602 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 603 604 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 605 606 *orients = impl->orients; 607 return CEED_ERROR_SUCCESS; 608 } 609 610 //------------------------------------------------------------------------------ 611 // ElemRestriction Get Curl-Conforming Orientations 612 //------------------------------------------------------------------------------ 613 static int CeedElemRestrictionGetCurlOrientations_Memcheck(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt8 **curl_orients) { 614 CeedElemRestriction_Memcheck *impl; 615 616 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 617 618 CeedCheck(mem_type == CEED_MEM_HOST, CeedElemRestrictionReturnCeed(rstr), CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 619 620 *curl_orients = impl->curl_orients; 621 return CEED_ERROR_SUCCESS; 622 } 623 624 //------------------------------------------------------------------------------ 625 // ElemRestriction Destroy 626 //------------------------------------------------------------------------------ 627 static int CeedElemRestrictionDestroy_Memcheck(CeedElemRestriction rstr) { 628 CeedElemRestriction_Memcheck *impl; 629 630 CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 631 CeedCallBackend(CeedFree(&impl->offsets_allocated)); 632 CeedCallBackend(CeedFree(&impl->orients_allocated)); 633 CeedCallBackend(CeedFree(&impl->curl_orients_allocated)); 634 CeedCallBackend(CeedFree(&impl)); 635 return CEED_ERROR_SUCCESS; 636 } 637 638 //------------------------------------------------------------------------------ 639 // ElemRestriction Create 640 //------------------------------------------------------------------------------ 641 int CeedElemRestrictionCreate_Memcheck(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orients, 642 const CeedInt8 *curl_orients, CeedElemRestriction rstr) { 643 Ceed ceed; 644 CeedInt num_elem, elem_size, num_block, block_size, num_comp, comp_stride, num_points = 0, num_offsets; 645 CeedRestrictionType rstr_type; 646 CeedElemRestriction_Memcheck *impl; 647 648 CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 649 CeedCallBackend(CeedElemRestrictionGetNumElements(rstr, &num_elem)); 650 CeedCallBackend(CeedElemRestrictionGetElementSize(rstr, &elem_size)); 651 CeedCallBackend(CeedElemRestrictionGetNumBlocks(rstr, &num_block)); 652 CeedCallBackend(CeedElemRestrictionGetBlockSize(rstr, &block_size)); 653 CeedCallBackend(CeedElemRestrictionGetNumComponents(rstr, &num_comp)); 654 CeedCallBackend(CeedElemRestrictionGetCompStride(rstr, &comp_stride)); 655 CeedCallBackend(CeedElemRestrictionGetType(rstr, &rstr_type)); 656 657 CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 658 659 CeedCallBackend(CeedCalloc(1, &impl)); 660 CeedCallBackend(CeedElemRestrictionSetData(rstr, impl)); 661 662 // Set layouts 663 { 664 bool has_backend_strides; 665 CeedInt e_layout[3] = {1, elem_size, elem_size * num_comp}, l_layout[3] = {0}; 666 667 CeedCallBackend(CeedElemRestrictionSetELayout(rstr, e_layout)); 668 if (rstr_type == CEED_RESTRICTION_STRIDED) { 669 CeedCallBackend(CeedElemRestrictionHasBackendStrides(rstr, &has_backend_strides)); 670 if (has_backend_strides) { 671 CeedCallBackend(CeedElemRestrictionGetBackendStrides_Memcheck(rstr, l_layout)); 672 CeedCallBackend(CeedElemRestrictionSetLLayout(rstr, l_layout)); 673 } 674 } 675 } 676 677 // Offsets data 678 if (rstr_type != CEED_RESTRICTION_STRIDED) { 679 const char *resource; 680 681 // Check indices for ref or memcheck backends 682 { 683 Ceed current = ceed, parent = NULL; 684 685 CeedCallBackend(CeedGetParent(current, &parent)); 686 while (current != parent) { 687 current = parent; 688 CeedCallBackend(CeedGetParent(current, &parent)); 689 } 690 CeedCallBackend(CeedGetResource(parent, &resource)); 691 } 692 if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 693 !strcmp(resource, "/cpu/self/memcheck/blocked")) { 694 CeedSize l_size; 695 696 CeedCallBackend(CeedElemRestrictionGetLVectorSize(rstr, &l_size)); 697 for (CeedInt i = 0; i < num_elem * elem_size; i++) { 698 CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 699 "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 700 } 701 } 702 703 // Copy data 704 if (rstr_type == CEED_RESTRICTION_POINTS) CeedCallBackend(CeedElemRestrictionGetNumPoints(rstr, &num_points)); 705 num_offsets = rstr_type == CEED_RESTRICTION_POINTS ? (num_elem + 1 + num_points) : (num_elem * elem_size); 706 switch (copy_mode) { 707 case CEED_COPY_VALUES: 708 CeedCallBackend(CeedMalloc(num_offsets, &impl->offsets_allocated)); 709 memcpy(impl->offsets_allocated, offsets, num_offsets * sizeof(offsets[0])); 710 impl->offsets = impl->offsets_allocated; 711 break; 712 case CEED_OWN_POINTER: 713 impl->offsets_allocated = (CeedInt *)offsets; 714 impl->offsets = impl->offsets_allocated; 715 break; 716 case CEED_USE_POINTER: 717 impl->offsets = offsets; 718 } 719 720 // Orientation data 721 if (rstr_type == CEED_RESTRICTION_ORIENTED) { 722 CeedCheck(orients != NULL, ceed, CEED_ERROR_BACKEND, "No orients array provided for oriented restriction"); 723 switch (copy_mode) { 724 case CEED_COPY_VALUES: 725 CeedCallBackend(CeedMalloc(num_offsets, &impl->orients_allocated)); 726 memcpy(impl->orients_allocated, orients, num_offsets * sizeof(orients[0])); 727 impl->orients = impl->orients_allocated; 728 break; 729 case CEED_OWN_POINTER: 730 impl->orients_allocated = (bool *)orients; 731 impl->orients = impl->orients_allocated; 732 break; 733 case CEED_USE_POINTER: 734 impl->orients = orients; 735 } 736 } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) { 737 CeedCheck(curl_orients != NULL, ceed, CEED_ERROR_BACKEND, "No curl_orients array provided for oriented restriction"); 738 switch (copy_mode) { 739 case CEED_COPY_VALUES: 740 CeedCallBackend(CeedMalloc(3 * num_offsets, &impl->curl_orients_allocated)); 741 memcpy(impl->curl_orients_allocated, curl_orients, 3 * num_offsets * sizeof(curl_orients[0])); 742 impl->curl_orients = impl->curl_orients_allocated; 743 break; 744 case CEED_OWN_POINTER: 745 impl->curl_orients_allocated = (CeedInt8 *)curl_orients; 746 impl->curl_orients = impl->curl_orients_allocated; 747 break; 748 case CEED_USE_POINTER: 749 impl->curl_orients = curl_orients; 750 } 751 } 752 } 753 754 // Set apply function 755 impl->Apply = CeedElemRestrictionApply_Memcheck_Core; 756 757 // Register backend functions 758 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Apply", CeedElemRestrictionApply_Memcheck)); 759 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Memcheck)); 760 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyUnoriented", CeedElemRestrictionApplyUnoriented_Memcheck)); 761 if (rstr_type == CEED_RESTRICTION_POINTS) { 762 CeedCallBackend( 763 CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyAtPointsInElement", CeedElemRestrictionApplyAtPointsInElement_Memcheck)); 764 } 765 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "ApplyBlock", CeedElemRestrictionApplyBlock_Memcheck)); 766 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOffsets", CeedElemRestrictionGetOffsets_Memcheck)); 767 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetOrientations", CeedElemRestrictionGetOrientations_Memcheck)); 768 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "GetCurlOrientations", CeedElemRestrictionGetCurlOrientations_Memcheck)); 769 CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", rstr, "Destroy", CeedElemRestrictionDestroy_Memcheck)); 770 return CEED_ERROR_SUCCESS; 771 } 772 773 //------------------------------------------------------------------------------ 774