1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <ceed/ceed.h> 18 #include <ceed/backend.h> 19 #include "ceed-magma.h" 20 21 #ifdef __cplusplus 22 CEED_INTERN "C" 23 #endif 24 int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 25 CeedTransposeMode tmode, CeedEvalMode emode, 26 CeedVector U, CeedVector V) { 27 int ierr; 28 Ceed ceed; 29 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 30 CeedInt dim, ncomp, ndof; 31 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 32 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 33 ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 34 35 Ceed_Magma *data; 36 ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 37 38 const CeedScalar *u; 39 CeedScalar *v; 40 if (emode != CEED_EVAL_WEIGHT) { 41 ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChkBackend(ierr); 42 } else if (emode != CEED_EVAL_WEIGHT) { 43 // LCOV_EXCL_START 44 return CeedError(ceed, CEED_ERROR_BACKEND, 45 "An input vector is required for this CeedEvalMode"); 46 // LCOV_EXCL_STOP 47 } 48 ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr); 49 50 CeedBasis_Magma *impl; 51 ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 52 53 CeedInt P1d, Q1d; 54 ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr); 55 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr); 56 57 CeedDebug(ceed, "\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", 58 ncomp*CeedIntPow(P1d, dim), ncomp); 59 60 if (tmode == CEED_TRANSPOSE) { 61 CeedInt length; 62 ierr = CeedVectorGetLength(V, &length); CeedChkBackend(ierr); 63 if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 64 magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) v, length, 65 data->queue); 66 } else { 67 magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) v, length, 68 data->queue); 69 } 70 ceed_magma_queue_sync( data->queue ); 71 } 72 switch (emode) { 73 case CEED_EVAL_INTERP: { 74 CeedInt P = P1d, Q = Q1d; 75 if (tmode == CEED_TRANSPOSE) { 76 P = Q1d; Q = P1d; 77 } 78 79 // Define element sizes for dofs/quad 80 CeedInt elquadsize = CeedIntPow(Q1d, dim); 81 CeedInt eldofssize = CeedIntPow(P1d, dim); 82 83 // E-vector ordering -------------- Q-vector ordering 84 // component component 85 // elem elem 86 // node node 87 88 // --- Define strides for NOTRANSPOSE mode: --- 89 // Input (u) is E-vector, output (v) is Q-vector 90 91 // Element strides 92 CeedInt u_elstride = eldofssize; 93 CeedInt v_elstride = elquadsize; 94 // Component strides 95 CeedInt u_compstride = nelem * eldofssize; 96 CeedInt v_compstride = nelem * elquadsize; 97 98 // --- Swap strides for TRANSPOSE mode: --- 99 if (tmode == CEED_TRANSPOSE) { 100 // Input (u) is Q-vector, output (v) is E-vector 101 // Element strides 102 v_elstride = eldofssize; 103 u_elstride = elquadsize; 104 // Component strides 105 v_compstride = nelem * eldofssize; 106 u_compstride = nelem * elquadsize; 107 } 108 109 ierr = magma_interp(P, Q, dim, ncomp, 110 impl->dinterp1d, tmode, 111 u, u_elstride, u_compstride, 112 v, v_elstride, v_compstride, 113 nelem, data->basis_kernel_mode, data->maxthreads, 114 data->queue); 115 if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 116 "MAGMA: launch failure detected for magma_interp"); 117 } 118 break; 119 case CEED_EVAL_GRAD: { 120 CeedInt P = P1d, Q = Q1d; 121 // In CEED_NOTRANSPOSE mode: 122 // u is (P^dim x nc), column-major layout (nc = ncomp) 123 // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 124 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 125 if (tmode == CEED_TRANSPOSE) { 126 P = Q1d, Q = P1d; 127 } 128 129 // Define element sizes for dofs/quad 130 CeedInt elquadsize = CeedIntPow(Q1d, dim); 131 CeedInt eldofssize = CeedIntPow(P1d, dim); 132 133 // E-vector ordering -------------- Q-vector ordering 134 // dim 135 // component component 136 // elem elem 137 // node node 138 139 // --- Define strides for NOTRANSPOSE mode: --- 140 // Input (u) is E-vector, output (v) is Q-vector 141 142 // Element strides 143 CeedInt u_elstride = eldofssize; 144 CeedInt v_elstride = elquadsize; 145 // Component strides 146 CeedInt u_compstride = nelem * eldofssize; 147 CeedInt v_compstride = nelem * elquadsize; 148 // Dimension strides 149 CeedInt u_dimstride = 0; 150 CeedInt v_dimstride = nelem * elquadsize * ncomp; 151 152 // --- Swap strides for TRANSPOSE mode: --- 153 if (tmode == CEED_TRANSPOSE) { 154 // Input (u) is Q-vector, output (v) is E-vector 155 // Element strides 156 v_elstride = eldofssize; 157 u_elstride = elquadsize; 158 // Component strides 159 v_compstride = nelem * eldofssize; 160 u_compstride = nelem * elquadsize; 161 // Dimension strides 162 v_dimstride = 0; 163 u_dimstride = nelem * elquadsize * ncomp; 164 165 } 166 167 ierr = magma_grad( P, Q, dim, ncomp, 168 impl->dinterp1d, impl->dgrad1d, tmode, 169 u, u_elstride, u_compstride, u_dimstride, 170 v, v_elstride, v_compstride, v_dimstride, 171 nelem, data->basis_kernel_mode, data->maxthreads, 172 data->queue); 173 if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 174 "MAGMA: launch failure detected for magma_grad"); 175 } 176 break; 177 case CEED_EVAL_WEIGHT: { 178 if (tmode == CEED_TRANSPOSE) 179 // LCOV_EXCL_START 180 return CeedError(ceed, CEED_ERROR_BACKEND, 181 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 182 // LCOV_EXCL_STOP 183 CeedInt Q = Q1d; 184 int eldofssize = CeedIntPow(Q, dim); 185 ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem, 186 data->basis_kernel_mode, data->maxthreads, data->queue); 187 if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 188 "MAGMA: launch failure detected for magma_weight"); 189 } 190 break; 191 // LCOV_EXCL_START 192 case CEED_EVAL_DIV: 193 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 194 case CEED_EVAL_CURL: 195 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 196 case CEED_EVAL_NONE: 197 return CeedError(ceed, CEED_ERROR_BACKEND, 198 "CEED_EVAL_NONE does not make sense in this context"); 199 // LCOV_EXCL_STOP 200 } 201 202 // must sync to ensure completeness 203 ceed_magma_queue_sync( data->queue ); 204 205 if (emode!=CEED_EVAL_WEIGHT) { 206 ierr = CeedVectorRestoreArrayRead(U, &u); CeedChkBackend(ierr); 207 } 208 ierr = CeedVectorRestoreArray(V, &v); CeedChkBackend(ierr); 209 return CEED_ERROR_SUCCESS; 210 } 211 212 #ifdef __cplusplus 213 CEED_INTERN "C" 214 #endif 215 int CeedBasisApplyNonTensor_f64_Magma(CeedBasis basis, CeedInt nelem, 216 CeedTransposeMode tmode, CeedEvalMode emode, 217 CeedVector U, CeedVector V) { 218 int ierr; 219 Ceed ceed; 220 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 221 222 Ceed_Magma *data; 223 ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 224 225 CeedInt dim, ncomp, ndof, nqpt; 226 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 227 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 228 ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 229 ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr); 230 const CeedScalar *du; 231 CeedScalar *dv; 232 if (emode != CEED_EVAL_WEIGHT) { 233 ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); 234 } else if (emode != CEED_EVAL_WEIGHT) { 235 // LCOV_EXCL_START 236 return CeedError(ceed, CEED_ERROR_BACKEND, 237 "An input vector is required for this CeedEvalMode"); 238 // LCOV_EXCL_STOP 239 } 240 ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); 241 242 CeedBasisNonTensor_Magma *impl; 243 ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 244 245 CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 246 ncomp*ndof, ncomp); 247 248 if (tmode == CEED_TRANSPOSE) { 249 CeedInt length; 250 ierr = CeedVectorGetLength(V, &length); 251 if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 252 magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length, 253 data->queue); 254 } else { 255 magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length, 256 data->queue); 257 } 258 ceed_magma_queue_sync( data->queue ); 259 } 260 261 switch (emode) { 262 case CEED_EVAL_INTERP: { 263 CeedInt P = ndof, Q = nqpt; 264 if (tmode == CEED_TRANSPOSE) 265 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 266 P, nelem*ncomp, Q, 267 1.0, (double *)impl->dinterp, P, 268 (double *)du, Q, 269 0.0, (double *)dv, P, data->queue); 270 else 271 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 272 Q, nelem*ncomp, P, 273 1.0, (double *)impl->dinterp, P, 274 (double *)du, P, 275 0.0, (double *)dv, Q, data->queue); 276 } 277 break; 278 279 case CEED_EVAL_GRAD: { 280 CeedInt P = ndof, Q = nqpt; 281 if (tmode == CEED_TRANSPOSE) { 282 CeedScalar beta = 0.0; 283 for(int d=0; d<dim; d++) { 284 if (d>0) 285 beta = 1.0; 286 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 287 P, nelem*ncomp, Q, 288 1.0, (double *)(impl->dgrad + d*P*Q), P, 289 (double *)(du + d*nelem*ncomp*Q), Q, 290 beta, (double *)dv, P, data->queue); 291 } 292 } else { 293 for(int d=0; d< dim; d++) 294 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 295 Q, nelem*ncomp, P, 296 1.0, (double *)(impl->dgrad + d*P*Q), P, 297 (double *)du, P, 298 0.0, (double *)(dv + d*nelem*ncomp*Q), Q, data->queue); 299 } 300 } 301 break; 302 303 case CEED_EVAL_WEIGHT: { 304 if (tmode == CEED_TRANSPOSE) 305 // LCOV_EXCL_START 306 return CeedError(ceed, CEED_ERROR_BACKEND, 307 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 308 // LCOV_EXCL_STOP 309 310 int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 311 int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 312 1 : 0 ); 313 magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 314 data->queue); 315 CeedChkBackend(ierr); 316 } 317 break; 318 319 // LCOV_EXCL_START 320 case CEED_EVAL_DIV: 321 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 322 case CEED_EVAL_CURL: 323 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 324 case CEED_EVAL_NONE: 325 return CeedError(ceed, CEED_ERROR_BACKEND, 326 "CEED_EVAL_NONE does not make sense in this context"); 327 // LCOV_EXCL_STOP 328 } 329 330 // must sync to ensure completeness 331 ceed_magma_queue_sync( data->queue ); 332 333 if (emode!=CEED_EVAL_WEIGHT) { 334 ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr); 335 } 336 ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr); 337 return CEED_ERROR_SUCCESS; 338 } 339 340 int CeedBasisApplyNonTensor_f32_Magma(CeedBasis basis, CeedInt nelem, 341 CeedTransposeMode tmode, CeedEvalMode emode, 342 CeedVector U, CeedVector V) { 343 int ierr; 344 Ceed ceed; 345 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 346 347 Ceed_Magma *data; 348 ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 349 350 CeedInt dim, ncomp, ndof, nqpt; 351 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 352 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 353 ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 354 ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr); 355 const CeedScalar *du; 356 CeedScalar *dv; 357 if (emode != CEED_EVAL_WEIGHT) { 358 ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); 359 } else if (emode != CEED_EVAL_WEIGHT) { 360 // LCOV_EXCL_START 361 return CeedError(ceed, CEED_ERROR_BACKEND, 362 "An input vector is required for this CeedEvalMode"); 363 // LCOV_EXCL_STOP 364 } 365 ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); 366 367 CeedBasisNonTensor_Magma *impl; 368 ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 369 370 CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 371 ncomp*ndof, ncomp); 372 373 if (tmode == CEED_TRANSPOSE) { 374 CeedInt length; 375 ierr = CeedVectorGetLength(V, &length); 376 if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 377 magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length, 378 data->queue); 379 } else { 380 magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length, 381 data->queue); 382 } 383 ceed_magma_queue_sync( data->queue ); 384 } 385 386 switch (emode) { 387 case CEED_EVAL_INTERP: { 388 CeedInt P = ndof, Q = nqpt; 389 if (tmode == CEED_TRANSPOSE) 390 magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 391 P, nelem*ncomp, Q, 392 1.0, (float *)impl->dinterp, P, 393 (float *)du, Q, 394 0.0, (float *)dv, P, data->queue); 395 else 396 magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans, 397 Q, nelem*ncomp, P, 398 1.0, (float *)impl->dinterp, P, 399 (float *)du, P, 400 0.0, (float *)dv, Q, data->queue); 401 } 402 break; 403 404 case CEED_EVAL_GRAD: { 405 CeedInt P = ndof, Q = nqpt; 406 if (tmode == CEED_TRANSPOSE) { 407 CeedScalar beta = 0.0; 408 for(int d=0; d<dim; d++) { 409 if (d>0) 410 beta = 1.0; 411 magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 412 P, nelem*ncomp, Q, 413 1.0, (float *)(impl->dgrad + d*P*Q), P, 414 (float *)(du + d*nelem*ncomp*Q), Q, 415 beta, (float *)dv, P, data->queue); 416 } 417 } else { 418 for(int d=0; d< dim; d++) 419 magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans, 420 Q, nelem*ncomp, P, 421 1.0, (float *)(impl->dgrad + d*P*Q), P, 422 (float *)du, P, 423 0.0, (float *)(dv + d*nelem*ncomp*Q), Q, data->queue); 424 } 425 } 426 break; 427 428 case CEED_EVAL_WEIGHT: { 429 if (tmode == CEED_TRANSPOSE) 430 // LCOV_EXCL_START 431 return CeedError(ceed, CEED_ERROR_BACKEND, 432 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 433 // LCOV_EXCL_STOP 434 435 int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 436 int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 437 1 : 0 ); 438 magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 439 data->queue); 440 CeedChkBackend(ierr); 441 } 442 break; 443 444 // LCOV_EXCL_START 445 case CEED_EVAL_DIV: 446 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 447 case CEED_EVAL_CURL: 448 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 449 case CEED_EVAL_NONE: 450 return CeedError(ceed, CEED_ERROR_BACKEND, 451 "CEED_EVAL_NONE does not make sense in this context"); 452 // LCOV_EXCL_STOP 453 } 454 455 // must sync to ensure completeness 456 ceed_magma_queue_sync( data->queue ); 457 458 if (emode!=CEED_EVAL_WEIGHT) { 459 ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr); 460 } 461 ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr); 462 return CEED_ERROR_SUCCESS; 463 } 464 465 #ifdef __cplusplus 466 CEED_INTERN "C" 467 #endif 468 int CeedBasisDestroy_Magma(CeedBasis basis) { 469 int ierr; 470 CeedBasis_Magma *impl; 471 ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 472 473 ierr = magma_free(impl->dqref1d); CeedChkBackend(ierr); 474 ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr); 475 ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr); 476 ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr); 477 478 ierr = CeedFree(&impl); CeedChkBackend(ierr); 479 480 return CEED_ERROR_SUCCESS; 481 } 482 483 #ifdef __cplusplus 484 CEED_INTERN "C" 485 #endif 486 int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 487 int ierr; 488 CeedBasisNonTensor_Magma *impl; 489 ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 490 491 ierr = magma_free(impl->dqref); CeedChkBackend(ierr); 492 ierr = magma_free(impl->dinterp); CeedChkBackend(ierr); 493 ierr = magma_free(impl->dgrad); CeedChkBackend(ierr); 494 ierr = magma_free(impl->dqweight); CeedChkBackend(ierr); 495 496 ierr = CeedFree(&impl); CeedChkBackend(ierr); 497 498 return CEED_ERROR_SUCCESS; 499 } 500 501 #ifdef __cplusplus 502 CEED_INTERN "C" 503 #endif 504 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, 505 const CeedScalar *interp1d, 506 const CeedScalar *grad1d, 507 const CeedScalar *qref1d, 508 const CeedScalar *qweight1d, CeedBasis basis) { 509 int ierr; 510 CeedBasis_Magma *impl; 511 Ceed ceed; 512 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 513 514 // Check for supported parameters 515 CeedInt ncomp = 0; 516 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 517 if (ncomp > 3) 518 // LCOV_EXCL_START 519 return CeedError(ceed, CEED_ERROR_BACKEND, 520 "Magma backend does not support tensor bases with more than 3 components"); 521 // LCOV_EXCL_STOP 522 if (P1d > 10) 523 // LCOV_EXCL_START 524 return CeedError(ceed, CEED_ERROR_BACKEND, 525 "Magma backend does not support tensor bases with more than 10 nodes in each dimension"); 526 // LCOV_EXCL_STOP 527 if (Q1d > 10) 528 // LCOV_EXCL_START 529 return CeedError(ceed, CEED_ERROR_BACKEND, 530 "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension"); 531 // LCOV_EXCL_STOP 532 533 Ceed_Magma *data; 534 ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 535 536 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 537 CeedBasisApply_Magma); CeedChkBackend(ierr); 538 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 539 CeedBasisDestroy_Magma); CeedChkBackend(ierr); 540 541 ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 542 ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 543 544 // Copy qref1d to the GPU 545 ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); 546 CeedChkBackend(ierr); 547 magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1, 548 data->queue); 549 550 // Copy interp1d to the GPU 551 ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0])); 552 CeedChkBackend(ierr); 553 magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1, 554 data->queue); 555 556 // Copy grad1d to the GPU 557 ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0])); 558 CeedChkBackend(ierr); 559 magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1, 560 data->queue); 561 562 // Copy qweight1d to the GPU 563 ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0])); 564 CeedChkBackend(ierr); 565 magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1, 566 data->queue); 567 568 return CEED_ERROR_SUCCESS; 569 } 570 571 #ifdef __cplusplus 572 CEED_INTERN "C" 573 #endif 574 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, 575 CeedInt nqpts, const CeedScalar *interp, 576 const CeedScalar *grad, const CeedScalar *qref, 577 const CeedScalar *qweight, CeedBasis basis) { 578 int ierr; 579 CeedBasisNonTensor_Magma *impl; 580 Ceed ceed; 581 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 582 583 Ceed_Magma *data; 584 ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 585 586 if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) { 587 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 588 CeedBasisApplyNonTensor_f64_Magma); 589 CeedChkBackend(ierr); 590 } else { 591 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 592 CeedBasisApplyNonTensor_f32_Magma); 593 CeedChkBackend(ierr); 594 } 595 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 596 CeedBasisDestroyNonTensor_Magma); CeedChkBackend(ierr); 597 598 ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 599 ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 600 601 // Copy qref to the GPU 602 ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0])); 603 CeedChkBackend(ierr); 604 magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue); 605 606 // Copy interp to the GPU 607 ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0])); 608 CeedChkBackend(ierr); 609 magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1, 610 data->queue); 611 612 // Copy grad to the GPU 613 ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0])); 614 CeedChkBackend(ierr); 615 magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1, 616 data->queue); 617 618 // Copy qweight to the GPU 619 ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0])); 620 CeedChkBackend(ierr); 621 magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1, 622 data->queue); 623 624 return CEED_ERROR_SUCCESS; 625 } 626