1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed/jit-tools.h> 11 #include <string.h> 12 13 #ifdef CEED_MAGMA_USE_HIP 14 #include "../hip/ceed-hip-common.h" 15 #include "../hip/ceed-hip-compile.h" 16 #else 17 #include "../cuda/ceed-cuda-common.h" 18 #include "../cuda/ceed-cuda-compile.h" 19 #endif 20 #include "ceed-magma-common.h" 21 #include "ceed-magma.h" 22 23 #include "ceed-magma-gemm-nontensor.h" 24 #include "ceed-magma-gemm-selector.h" 25 26 //------------------------------------------------------------------------------ 27 // Basis apply - tensor 28 //------------------------------------------------------------------------------ 29 static int CeedBasisApply_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, CeedVector v) { 30 Ceed ceed; 31 Ceed_Magma *data; 32 CeedInt dim, num_comp, num_nodes, P_1d, Q_1d, P, Q; 33 const CeedScalar *d_u; 34 CeedScalar *d_v; 35 CeedBasis_Magma *impl; 36 37 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 38 CeedCallBackend(CeedGetData(ceed, &data)); 39 CeedCallBackend(CeedBasisGetData(basis, &impl)); 40 CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 41 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 42 CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 43 CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 44 CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 45 P = P_1d; 46 Q = Q_1d; 47 if (t_mode == CEED_TRANSPOSE) { 48 P = Q_1d; 49 Q = P_1d; 50 } 51 52 // Read vectors 53 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 54 else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 55 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 56 57 // Apply basis operation 58 switch (e_mode) { 59 case CEED_EVAL_INTERP: { 60 // Define element sizes for dofs/quad 61 CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim); 62 CeedInt elem_dofs_size = CeedIntPow(P_1d, dim); 63 64 // E-vector ordering -------------- Q-vector ordering 65 // component component 66 // elem elem 67 // node node 68 69 // --- Define strides for NOTRANSPOSE mode: --- 70 // Input (d_u) is E-vector, output (d_v) is Q-vector 71 72 // Element strides 73 CeedInt u_elem_stride = elem_dofs_size; 74 CeedInt v_elem_stride = elem_qpts_size; 75 // Component strides 76 CeedInt u_comp_stride = num_elem * elem_dofs_size; 77 CeedInt v_comp_stride = num_elem * elem_qpts_size; 78 if (t_mode == CEED_TRANSPOSE) { 79 // Input (d_u) is Q-vector, output (d_v) is E-vector 80 // Element strides 81 v_elem_stride = elem_dofs_size; 82 u_elem_stride = elem_qpts_size; 83 // Component strides 84 v_comp_stride = num_elem * elem_dofs_size; 85 u_comp_stride = num_elem * elem_qpts_size; 86 } 87 CeedInt num_threads = 1; 88 CeedInt num_t_col = 1; 89 CeedInt shared_mem = 0; 90 CeedInt max_P_Q = CeedIntMax(P, Q); 91 92 switch (dim) { 93 case 1: 94 num_threads = max_P_Q; 95 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); 96 shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); 97 shared_mem += sizeof(CeedScalar) * (P * Q); 98 break; 99 case 2: 100 num_threads = max_P_Q; 101 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); 102 shared_mem += P * Q * sizeof(CeedScalar); // for sT 103 // for reforming rU we need P x P, and for the intermediate output we need P x Q 104 shared_mem += num_t_col * (P * max_P_Q * sizeof(CeedScalar)); 105 break; 106 case 3: 107 num_threads = max_P_Q * max_P_Q; 108 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); 109 shared_mem += sizeof(CeedScalar) * (P * Q); // for sT 110 // rU needs P^2 x P, the intermediate output needs max(P^2 x Q, P x Q^2) 111 shared_mem += sizeof(CeedScalar) * num_t_col * (CeedIntMax(P * P * max_P_Q, P * Q * Q)); 112 break; 113 } 114 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 115 void *args[] = {&impl->d_interp_1d, &d_u, &u_elem_stride, &u_comp_stride, &d_v, &v_elem_stride, &v_comp_stride, &num_elem}; 116 117 if (t_mode == CEED_TRANSPOSE) { 118 CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->InterpTranspose, grid, num_threads, num_t_col, 1, shared_mem, args)); 119 } else { 120 CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Interp, grid, num_threads, num_t_col, 1, shared_mem, args)); 121 } 122 } break; 123 case CEED_EVAL_GRAD: { 124 // Define element sizes for dofs/quad 125 CeedInt elem_qpts_size = CeedIntPow(Q_1d, dim); 126 CeedInt elem_dofs_size = CeedIntPow(P_1d, dim); 127 128 // In CEED_NOTRANSPOSE mode: 129 // d_u is (P^dim x nc), column-major layout (nc = num_comp) 130 // d_v is (Q^dim x nc x dim), column-major layout (nc = num_comp) 131 // In CEED_TRANSPOSE mode, the sizes of d_u and d_v are switched. 132 133 // E-vector ordering -------------- Q-vector ordering 134 // dim 135 // component component 136 // elem elem 137 // node node 138 139 // --- Define strides for NOTRANSPOSE mode: --- 140 // Input (d_u) is E-vector, output (d_v) is Q-vector 141 142 // Element strides 143 CeedInt u_elem_stride = elem_dofs_size; 144 CeedInt v_elem_stride = elem_qpts_size; 145 // Component strides 146 CeedInt u_comp_stride = num_elem * elem_dofs_size; 147 CeedInt v_comp_stride = num_elem * elem_qpts_size; 148 // Dimension strides 149 CeedInt u_dim_stride = 0; 150 CeedInt v_dim_stride = num_elem * elem_qpts_size * num_comp; 151 if (t_mode == CEED_TRANSPOSE) { 152 // Input (d_u) is Q-vector, output (d_v) is E-vector 153 // Element strides 154 v_elem_stride = elem_dofs_size; 155 u_elem_stride = elem_qpts_size; 156 // Component strides 157 v_comp_stride = num_elem * elem_dofs_size; 158 u_comp_stride = num_elem * elem_qpts_size; 159 // Dimension strides 160 v_dim_stride = 0; 161 u_dim_stride = num_elem * elem_qpts_size * num_comp; 162 } 163 CeedInt num_threads = 1; 164 CeedInt num_t_col = 1; 165 CeedInt shared_mem = 0; 166 CeedInt max_P_Q = CeedIntMax(P, Q); 167 168 switch (dim) { 169 case 1: 170 num_threads = max_P_Q; 171 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); 172 shared_mem += sizeof(CeedScalar) * num_t_col * (num_comp * (1 * P + 1 * Q)); 173 shared_mem += sizeof(CeedScalar) * (P * Q); 174 break; 175 case 2: 176 num_threads = max_P_Q; 177 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); 178 shared_mem += sizeof(CeedScalar) * 2 * P * Q; // for sTinterp and sTgrad 179 // for reforming rU we need P x P, and for the intermediate output we need P x Q 180 shared_mem += sizeof(CeedScalar) * num_t_col * (P * max_P_Q); 181 break; 182 case 3: 183 num_threads = max_P_Q * max_P_Q; 184 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); 185 shared_mem += sizeof(CeedScalar) * 2 * P * Q; // for sTinterp and sTgrad 186 // rU needs P^2 x P, the intermediate outputs need (P^2 x Q + P x Q^2) 187 shared_mem += sizeof(CeedScalar) * num_t_col * CeedIntMax(P * P * P, (P * P * Q) + (P * Q * Q)); 188 break; 189 } 190 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 191 void *args[] = {&impl->d_interp_1d, &impl->d_grad_1d, &d_u, &u_elem_stride, &u_comp_stride, &u_dim_stride, &d_v, 192 &v_elem_stride, &v_comp_stride, &v_dim_stride, &num_elem}; 193 194 if (t_mode == CEED_TRANSPOSE) { 195 CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->GradTranspose, grid, num_threads, num_t_col, 1, shared_mem, args)); 196 } else { 197 CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Grad, grid, num_threads, num_t_col, 1, shared_mem, args)); 198 } 199 } break; 200 case CEED_EVAL_WEIGHT: { 201 CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 202 CeedCheck(impl->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weight_1d not set", CeedEvalModes[e_mode]); 203 CeedInt elem_dofs_size = CeedIntPow(Q, dim); 204 CeedInt num_threads = 1; 205 CeedInt num_t_col = 1; 206 CeedInt shared_mem = 0; 207 208 switch (dim) { 209 case 1: 210 num_threads = Q; 211 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_1D); 212 shared_mem += sizeof(CeedScalar) * Q; // for d_q_weight_1d 213 shared_mem += sizeof(CeedScalar) * num_t_col * Q; // for output 214 break; 215 case 2: 216 num_threads = Q; 217 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_2D); 218 shared_mem += sizeof(CeedScalar) * Q; // for d_q_weight_1d 219 break; 220 case 3: 221 num_threads = Q * Q; 222 num_t_col = MAGMA_BASIS_NTCOL(num_threads, MAGMA_MAXTHREADS_3D); 223 shared_mem += sizeof(CeedScalar) * Q; // for d_q_weight_1d 224 break; 225 } 226 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 227 void *args[] = {&impl->d_q_weight_1d, &d_v, &elem_dofs_size, &num_elem}; 228 229 CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, grid, num_threads, num_t_col, 1, shared_mem, args)); 230 } break; 231 // LCOV_EXCL_START 232 case CEED_EVAL_DIV: 233 case CEED_EVAL_CURL: 234 return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[e_mode]); 235 case CEED_EVAL_NONE: 236 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_NONE does not make sense in this context"); 237 // LCOV_EXCL_STOP 238 } 239 240 // Must sync to ensure completeness 241 ceed_magma_queue_sync(data->queue); 242 243 // Restore vectors 244 if (e_mode != CEED_EVAL_WEIGHT) { 245 CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 246 } 247 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 248 return CEED_ERROR_SUCCESS; 249 } 250 251 //------------------------------------------------------------------------------ 252 // Basis apply - tensor AtPoints 253 //------------------------------------------------------------------------------ 254 int CeedBasisApplyAtPoints_Magma(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 255 CeedVector x_ref, CeedVector u, CeedVector v) { 256 return CeedError(CeedBasisReturnCeed(basis), CEED_ERROR_BACKEND, "Backend does not implement CeedBasisApplyAtPoints"); 257 } 258 259 //------------------------------------------------------------------------------ 260 // Basis apply - non-tensor 261 //------------------------------------------------------------------------------ 262 static int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode e_mode, CeedVector u, 263 CeedVector v) { 264 Ceed ceed; 265 Ceed_Magma *data; 266 CeedInt num_comp, num_nodes, num_qpts, P, Q, N; 267 const CeedScalar *d_u; 268 CeedScalar *d_v; 269 CeedBasisNonTensor_Magma *impl; 270 271 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 272 CeedCallBackend(CeedGetData(ceed, &data)); 273 CeedCallBackend(CeedBasisGetData(basis, &impl)); 274 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 275 CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 276 CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 277 P = num_nodes; 278 Q = num_qpts; 279 N = num_elem * num_comp; 280 281 // Read vectors 282 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 283 else CeedCheck(e_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 284 CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 285 286 // Compile kernels for N as needed 287 CeedInt iN = 0; 288 if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q && (e_mode != CEED_EVAL_WEIGHT || !impl->Weight)) { 289 CeedInt n_array[MAGMA_NONTENSOR_KERNEL_INSTANCES] = {MAGMA_NONTENSOR_KERNEL_N_VALUES}; 290 CeedInt diff = abs(n_array[iN] - N), idiff; 291 292 for (CeedInt in = iN + 1; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) { 293 idiff = abs(n_array[in] - N); 294 if (idiff < diff) { 295 iN = in; 296 diff = idiff; 297 } 298 } 299 300 if (!impl->NB_interp[iN]) { 301 CeedFESpace fe_space; 302 CeedInt q_comp_interp, q_comp_deriv; 303 Ceed ceed_delegate; 304 char *basis_kernel_source; 305 const char *basis_kernel_path, *weight_kernel_path; 306 char **file_paths = NULL; 307 CeedInt num_file_paths = 0; 308 magma_int_t arch = magma_getdevice_arch(); 309 310 // Tuning parameters for NB 311 CeedCallBackend(CeedBasisGetFESpace(basis, &fe_space)); 312 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 313 switch (fe_space) { 314 case CEED_FE_SPACE_H1: 315 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_deriv)); 316 break; 317 case CEED_FE_SPACE_HDIV: 318 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_deriv)); 319 break; 320 case CEED_FE_SPACE_HCURL: 321 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_deriv)); 322 break; 323 } 324 impl->NB_interp[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_interp, P, Q, n_array[iN]); 325 impl->NB_interp_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_interp, P, Q, n_array[iN]); 326 impl->NB_deriv[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_deriv, P, Q, n_array[iN]); 327 impl->NB_deriv_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_deriv, P, Q, n_array[iN]); 328 329 // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 330 CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 331 332 // Compile kernels 333 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-interp-deriv-nontensor.h", &basis_kernel_path)); 334 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 335 CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, basis_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 336 if (!impl->Weight) { 337 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 338 CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, weight_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 339 } 340 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 341 CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[iN], 8, "BASIS_Q_COMP_INTERP", q_comp_interp, 342 "BASIS_Q_COMP_DERIV", q_comp_deriv, "BASIS_P", P, "BASIS_Q", Q, "BASIS_NB_INTERP_N", impl->NB_interp[iN], 343 "BASIS_NB_INTERP_T", impl->NB_interp_t[iN], "BASIS_NB_DERIV_N", impl->NB_deriv[iN], "BASIS_NB_DERIV_T", 344 impl->NB_deriv_t[iN])); 345 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_n", &impl->Interp[iN])); 346 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_interp_nontensor_t", &impl->InterpTranspose[iN])); 347 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_n", &impl->Deriv[iN])); 348 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_deriv_nontensor_t", &impl->DerivTranspose[iN])); 349 if (!impl->Weight) { 350 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[iN], "magma_weight_nontensor", &impl->Weight)); 351 CeedCallBackend(CeedFree(&weight_kernel_path)); 352 } 353 CeedCallBackend(CeedFree(&basis_kernel_path)); 354 CeedCallBackend(CeedFree(&basis_kernel_source)); 355 for (CeedInt i = 0; i < num_file_paths; i++) CeedCall(CeedFree(&file_paths[i])); 356 CeedCall(CeedFree(&file_paths)); 357 } 358 } 359 360 // Apply basis operation 361 if (e_mode != CEED_EVAL_WEIGHT) { 362 const CeedScalar *d_b = NULL; 363 CeedInt q_comp, NB, M, K; 364 CeedMagmaFunction Kernel; 365 366 switch (e_mode) { 367 case CEED_EVAL_INTERP: 368 d_b = impl->d_interp; 369 break; 370 case CEED_EVAL_GRAD: 371 d_b = impl->d_grad; 372 break; 373 case CEED_EVAL_DIV: 374 d_b = impl->d_div; 375 break; 376 case CEED_EVAL_CURL: 377 d_b = impl->d_curl; 378 break; 379 // LCOV_EXCL_START 380 case CEED_EVAL_WEIGHT: 381 case CEED_EVAL_NONE: 382 return CeedError(ceed, CEED_ERROR_BACKEND, "%s does not make sense in this context", CeedEvalModes[e_mode]); 383 // LCOV_EXCL_STOP 384 } 385 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, e_mode, &q_comp)); 386 M = (t_mode == CEED_TRANSPOSE) ? P : Q, K = (t_mode == CEED_TRANSPOSE) ? Q : P; 387 388 if (P <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P && Q <= MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 389 if (e_mode == CEED_EVAL_INTERP) { 390 if (t_mode == CEED_TRANSPOSE) { 391 Kernel = impl->InterpTranspose[iN]; 392 NB = impl->NB_interp_t[iN]; 393 } else { 394 Kernel = impl->Interp[iN]; 395 NB = impl->NB_interp[iN]; 396 } 397 } else { 398 if (t_mode == CEED_TRANSPOSE) { 399 Kernel = impl->DerivTranspose[iN]; 400 NB = impl->NB_deriv_t[iN]; 401 } else { 402 Kernel = impl->Deriv[iN]; 403 NB = impl->NB_deriv[iN]; 404 } 405 } 406 CeedInt num_t_col = MAGMA_BASIS_NTCOL(M, MAGMA_MAXTHREADS_1D); 407 CeedInt grid = CeedDivUpInt(N, num_t_col * NB); 408 CeedInt shared_mem_A = P * Q * sizeof(CeedScalar); 409 CeedInt shared_mem_B = num_t_col * K * NB * sizeof(CeedScalar); 410 CeedInt shared_mem = (t_mode != CEED_TRANSPOSE && q_comp > 1) ? (shared_mem_A + shared_mem_B) : CeedIntMax(shared_mem_A, shared_mem_B); 411 void *args[] = {&N, &d_b, &d_u, &d_v}; 412 413 CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, Kernel, grid, M, num_t_col, 1, shared_mem, args)); 414 } else { 415 for (CeedInt d = 0; d < q_comp; d++) { 416 if (t_mode == CEED_TRANSPOSE) { 417 const CeedScalar beta = (d > 0) ? 1.0 : 0.0; 418 magma_gemm_nontensor(MagmaNoTrans, MagmaNoTrans, P, N, Q, 1.0, d_b + d * P * Q, P, d_u + d * N * Q, Q, beta, d_v, P, data->queue); 419 } else { 420 magma_gemm_nontensor(MagmaTrans, MagmaNoTrans, Q, N, P, 1.0, d_b + d * P * Q, P, d_u, P, 0.0, d_v + d * N * Q, Q, data->queue); 421 } 422 } 423 } 424 } else { 425 CeedCheck(t_mode != CEED_TRANSPOSE, ceed, CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 426 CeedCheck(impl->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weight not set", CeedEvalModes[e_mode]); 427 CeedInt num_t_col = MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D); 428 CeedInt grid = CeedDivUpInt(num_elem, num_t_col); 429 CeedInt shared_mem = Q * sizeof(CeedScalar) + num_t_col * Q * sizeof(CeedScalar); 430 void *args[] = {&num_elem, &impl->d_q_weight, &d_v}; 431 432 CeedCallBackend(CeedRunKernelDimSharedMagma(ceed, impl->Weight, grid, Q, num_t_col, 1, shared_mem, args)); 433 } 434 435 // Must sync to ensure completeness 436 ceed_magma_queue_sync(data->queue); 437 438 // Restore vectors 439 if (e_mode != CEED_EVAL_WEIGHT) { 440 CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 441 } 442 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 443 return CEED_ERROR_SUCCESS; 444 } 445 446 //------------------------------------------------------------------------------ 447 // Destroy tensor basis 448 //------------------------------------------------------------------------------ 449 static int CeedBasisDestroy_Magma(CeedBasis basis) { 450 Ceed ceed; 451 CeedBasis_Magma *impl; 452 453 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 454 CeedCallBackend(CeedBasisGetData(basis, &impl)); 455 #ifdef CEED_MAGMA_USE_HIP 456 CeedCallHip(ceed, hipModuleUnload(impl->module)); 457 #else 458 CeedCallCuda(ceed, cuModuleUnload(impl->module)); 459 #endif 460 CeedCallBackend(magma_free(impl->d_interp_1d)); 461 CeedCallBackend(magma_free(impl->d_grad_1d)); 462 if (impl->d_q_weight_1d) CeedCallBackend(magma_free(impl->d_q_weight_1d)); 463 CeedCallBackend(CeedFree(&impl)); 464 return CEED_ERROR_SUCCESS; 465 } 466 467 //------------------------------------------------------------------------------ 468 // Destroy non-tensor basis 469 //------------------------------------------------------------------------------ 470 static int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 471 Ceed ceed; 472 CeedBasisNonTensor_Magma *impl; 473 474 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 475 CeedCallBackend(CeedBasisGetData(basis, &impl)); 476 for (CeedInt in = 0; in < MAGMA_NONTENSOR_KERNEL_INSTANCES; in++) { 477 if (impl->module[in]) { 478 #ifdef CEED_MAGMA_USE_HIP 479 CeedCallHip(ceed, hipModuleUnload(impl->module[in])); 480 #else 481 CeedCallCuda(ceed, cuModuleUnload(impl->module[in])); 482 #endif 483 } 484 } 485 CeedCallBackend(magma_free(impl->d_interp)); 486 CeedCallBackend(magma_free(impl->d_grad)); 487 CeedCallBackend(magma_free(impl->d_div)); 488 CeedCallBackend(magma_free(impl->d_curl)); 489 if (impl->d_q_weight) CeedCallBackend(magma_free(impl->d_q_weight)); 490 CeedCallBackend(CeedFree(&impl)); 491 return CEED_ERROR_SUCCESS; 492 } 493 494 //------------------------------------------------------------------------------ 495 // Create tensor 496 //------------------------------------------------------------------------------ 497 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 498 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 499 Ceed ceed, ceed_delegate; 500 Ceed_Magma *data; 501 char *basis_kernel_source; 502 const char *interp_kernel_path, *grad_kernel_path, *weight_kernel_path; 503 char **file_paths = NULL; 504 CeedInt num_file_paths = 0; 505 CeedInt num_comp; 506 CeedBasis_Magma *impl; 507 508 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 509 CeedCallBackend(CeedGetData(ceed, &data)); 510 CeedCallBackend(CeedCalloc(1, &impl)); 511 512 // Copy basis data to GPU 513 if (q_weight_1d) { 514 CeedCallBackend(magma_malloc((void **)&impl->d_q_weight_1d, Q_1d * sizeof(q_weight_1d[0]))); 515 magma_setvector(Q_1d, sizeof(q_weight_1d[0]), q_weight_1d, 1, impl->d_q_weight_1d, 1, data->queue); 516 } 517 CeedCallBackend(magma_malloc((void **)&impl->d_interp_1d, Q_1d * P_1d * sizeof(interp_1d[0]))); 518 magma_setvector(Q_1d * P_1d, sizeof(interp_1d[0]), interp_1d, 1, impl->d_interp_1d, 1, data->queue); 519 CeedCallBackend(magma_malloc((void **)&impl->d_grad_1d, Q_1d * P_1d * sizeof(grad_1d[0]))); 520 magma_setvector(Q_1d * P_1d, sizeof(grad_1d[0]), grad_1d, 1, impl->d_grad_1d, 1, data->queue); 521 522 // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 523 CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 524 525 // Compile kernels 526 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 527 { 528 char *interp_kernel_name_base = "ceed/jit-source/magma/magma-basis-interp"; 529 CeedInt interp_kernel_name_len = strlen(interp_kernel_name_base) + 6; 530 char interp_kernel_name[interp_kernel_name_len]; 531 532 snprintf(interp_kernel_name, interp_kernel_name_len, "%s-%" CeedInt_FMT "d.h", interp_kernel_name_base, dim); 533 CeedCallBackend(CeedGetJitAbsolutePath(ceed, interp_kernel_name, &interp_kernel_path)); 534 } 535 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 536 CeedCallBackend(CeedLoadSourceAndInitializeBuffer(ceed, interp_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 537 { 538 char *grad_kernel_name_base = "ceed/jit-source/magma/magma-basis-grad"; 539 CeedInt grad_kernel_name_len = strlen(grad_kernel_name_base) + 6; 540 char grad_kernel_name[grad_kernel_name_len]; 541 542 snprintf(grad_kernel_name, grad_kernel_name_len, "%s-%" CeedInt_FMT "d.h", grad_kernel_name_base, dim); 543 CeedCallBackend(CeedGetJitAbsolutePath(ceed, grad_kernel_name, &grad_kernel_path)); 544 } 545 CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, grad_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 546 { 547 char *weight_kernel_name_base = "ceed/jit-source/magma/magma-basis-weight"; 548 CeedInt weight_kernel_name_len = strlen(weight_kernel_name_base) + 6; 549 char weight_kernel_name[weight_kernel_name_len]; 550 551 snprintf(weight_kernel_name, weight_kernel_name_len, "%s-%" CeedInt_FMT "d.h", weight_kernel_name_base, dim); 552 CeedCallBackend(CeedGetJitAbsolutePath(ceed, weight_kernel_name, &weight_kernel_path)); 553 } 554 CeedCallBackend(CeedLoadSourceToInitializedBuffer(ceed, weight_kernel_path, &num_file_paths, &file_paths, &basis_kernel_source)); 555 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 556 CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module, 5, "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, "BASIS_P", 557 P_1d, "BASIS_Q", Q_1d, "BASIS_MAX_P_Q", CeedIntMax(P_1d, Q_1d))); 558 switch (dim) { 559 case 1: 560 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_1d_kernel", &impl->Interp)); 561 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_1d_kernel", &impl->InterpTranspose)); 562 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_1d_kernel", &impl->Grad)); 563 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_1d_kernel", &impl->GradTranspose)); 564 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_1d_kernel", &impl->Weight)); 565 break; 566 case 2: 567 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_2d_kernel", &impl->Interp)); 568 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_2d_kernel", &impl->InterpTranspose)); 569 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_2d_kernel", &impl->Grad)); 570 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_2d_kernel", &impl->GradTranspose)); 571 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_2d_kernel", &impl->Weight)); 572 break; 573 case 3: 574 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpn_3d_kernel", &impl->Interp)); 575 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_interpt_3d_kernel", &impl->InterpTranspose)); 576 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradn_3d_kernel", &impl->Grad)); 577 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_gradt_3d_kernel", &impl->GradTranspose)); 578 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module, "magma_weight_3d_kernel", &impl->Weight)); 579 break; 580 } 581 CeedCallBackend(CeedFree(&interp_kernel_path)); 582 CeedCallBackend(CeedFree(&grad_kernel_path)); 583 CeedCallBackend(CeedFree(&weight_kernel_path)); 584 CeedCallBackend(CeedFree(&basis_kernel_source)); 585 for (CeedInt i = 0; i < num_file_paths; i++) CeedCall(CeedFree(&file_paths[i])); 586 CeedCall(CeedFree(&file_paths)); 587 588 CeedCallBackend(CeedBasisSetData(basis, impl)); 589 590 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Magma)); 591 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Magma)); 592 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Magma)); 593 return CEED_ERROR_SUCCESS; 594 } 595 596 //------------------------------------------------------------------------------ 597 // Create non-tensor H^1 598 //------------------------------------------------------------------------------ 599 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, 600 const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 601 Ceed ceed; 602 Ceed_Magma *data; 603 CeedBasisNonTensor_Magma *impl; 604 605 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 606 CeedCallBackend(CeedGetData(ceed, &data)); 607 CeedCallBackend(CeedCalloc(1, &impl)); 608 609 // Copy basis data to GPU 610 if (q_weight) { 611 CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0]))); 612 magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue); 613 } 614 if (interp) { 615 CeedInt q_comp_interp; 616 617 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 618 CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0]))); 619 magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue); 620 } 621 if (grad) { 622 CeedInt q_comp_grad; 623 624 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad)); 625 CeedCallBackend(magma_malloc((void **)&impl->d_grad, num_qpts * num_nodes * q_comp_grad * sizeof(grad[0]))); 626 magma_setvector(num_qpts * num_nodes * q_comp_grad, sizeof(grad[0]), grad, 1, impl->d_grad, 1, data->queue); 627 } 628 629 // Compile the weight kernel if it won't be compiled later on 630 if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 631 Ceed ceed_delegate; 632 char *basis_kernel_source; 633 const char *weight_kernel_path; 634 635 // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 636 CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 637 638 // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply) 639 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 640 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 641 CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source)); 642 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 643 CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts)); 644 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight)); 645 CeedCallBackend(CeedFree(&weight_kernel_path)); 646 CeedCallBackend(CeedFree(&basis_kernel_source)); 647 } 648 649 CeedCallBackend(CeedBasisSetData(basis, impl)); 650 651 // Register backend functions 652 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma)); 653 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma)); 654 return CEED_ERROR_SUCCESS; 655 } 656 657 //------------------------------------------------------------------------------ 658 // Create non-tensor H(div) 659 //------------------------------------------------------------------------------ 660 int CeedBasisCreateHdiv_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 661 const CeedScalar *div, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 662 Ceed ceed; 663 Ceed_Magma *data; 664 CeedBasisNonTensor_Magma *impl; 665 666 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 667 CeedCallBackend(CeedGetData(ceed, &data)); 668 CeedCallBackend(CeedCalloc(1, &impl)); 669 670 // Copy basis data to GPU 671 if (q_weight) { 672 CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0]))); 673 magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue); 674 } 675 if (interp) { 676 CeedInt q_comp_interp; 677 678 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 679 CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0]))); 680 magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue); 681 } 682 if (div) { 683 CeedInt q_comp_div; 684 685 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div)); 686 CeedCallBackend(magma_malloc((void **)&impl->d_div, num_qpts * num_nodes * q_comp_div * sizeof(div[0]))); 687 magma_setvector(num_qpts * num_nodes * q_comp_div, sizeof(div[0]), div, 1, impl->d_div, 1, data->queue); 688 } 689 690 // Compile the weight kernel if it won't be compiled later on 691 if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 692 Ceed ceed_delegate; 693 char *basis_kernel_source; 694 const char *weight_kernel_path; 695 696 // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 697 CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 698 699 // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply) 700 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 701 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 702 CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source)); 703 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 704 CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts)); 705 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight)); 706 CeedCallBackend(CeedFree(&weight_kernel_path)); 707 CeedCallBackend(CeedFree(&basis_kernel_source)); 708 } 709 710 CeedCallBackend(CeedBasisSetData(basis, impl)); 711 712 // Register backend functions 713 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma)); 714 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma)); 715 return CEED_ERROR_SUCCESS; 716 } 717 718 //------------------------------------------------------------------------------ 719 // Create non-tensor H(curl) 720 //------------------------------------------------------------------------------ 721 int CeedBasisCreateHcurl_Magma(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 722 const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 723 Ceed ceed; 724 Ceed_Magma *data; 725 CeedBasisNonTensor_Magma *impl; 726 727 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 728 CeedCallBackend(CeedGetData(ceed, &data)); 729 CeedCallBackend(CeedCalloc(1, &impl)); 730 731 // Copy basis data to GPU 732 if (q_weight) { 733 CeedCallBackend(magma_malloc((void **)&impl->d_q_weight, num_qpts * sizeof(q_weight[0]))); 734 magma_setvector(num_qpts, sizeof(q_weight[0]), q_weight, 1, impl->d_q_weight, 1, data->queue); 735 } 736 if (interp) { 737 CeedInt q_comp_interp; 738 739 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 740 CeedCallBackend(magma_malloc((void **)&impl->d_interp, num_qpts * num_nodes * q_comp_interp * sizeof(interp[0]))); 741 magma_setvector(num_qpts * num_nodes * q_comp_interp, sizeof(interp[0]), interp, 1, impl->d_interp, 1, data->queue); 742 } 743 if (curl) { 744 CeedInt q_comp_curl; 745 746 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl)); 747 CeedCallBackend(magma_malloc((void **)&impl->d_curl, num_qpts * num_nodes * q_comp_curl * sizeof(curl[0]))); 748 magma_setvector(num_qpts * num_nodes * q_comp_curl, sizeof(curl[0]), curl, 1, impl->d_curl, 1, data->queue); 749 } 750 751 // Compile the weight kernel if it won't be compiled later on 752 if (num_nodes > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_P || num_qpts > MAGMA_NONTENSOR_CUSTOM_KERNEL_MAX_Q) { 753 Ceed ceed_delegate; 754 char *basis_kernel_source; 755 const char *weight_kernel_path; 756 757 // The RTC compilation code expects a Ceed with the common Ceed_Cuda or Ceed_Hip data 758 CeedCallBackend(CeedGetDelegate(ceed, &ceed_delegate)); 759 760 // Compile weight kernel (the remainder of kernel compilation happens at first call to CeedBasisApply) 761 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/magma/magma-basis-weight-nontensor.h", &weight_kernel_path)); 762 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 763 CeedCallBackend(CeedLoadSourceToBuffer(ceed, weight_kernel_path, &basis_kernel_source)); 764 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 765 CeedCallBackend(CeedCompileMagma(ceed_delegate, basis_kernel_source, &impl->module[0], 1, "BASIS_Q", num_qpts)); 766 CeedCallBackend(CeedGetKernelMagma(ceed, impl->module[0], "magma_weight_nontensor", &impl->Weight)); 767 CeedCallBackend(CeedFree(&weight_kernel_path)); 768 CeedCallBackend(CeedFree(&basis_kernel_source)); 769 } 770 771 CeedCallBackend(CeedBasisSetData(basis, impl)); 772 773 // Register backend functions 774 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Magma)); 775 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Magma)); 776 return CEED_ERROR_SUCCESS; 777 } 778 779 //------------------------------------------------------------------------------ 780