17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details. 47f5b9731SStan Tomov // 57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software 67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral 77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and 87f5b9731SStan Tomov // source code availability see http://github.com/ceed. 97f5b9731SStan Tomov // 107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office 127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for 137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including 147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early 157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative. 167f5b9731SStan Tomov 17ec3da8bcSJed Brown #include <ceed/ceed.h> 18ec3da8bcSJed Brown #include <ceed/backend.h> 197f5b9731SStan Tomov #include "ceed-magma.h" 207f5b9731SStan Tomov 217f5b9731SStan Tomov #ifdef __cplusplus 227f5b9731SStan Tomov CEED_INTERN "C" 237f5b9731SStan Tomov #endif 247f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 257f5b9731SStan Tomov CeedTransposeMode tmode, CeedEvalMode emode, 263513a710Sjeremylt CeedVector U, CeedVector V) { 277f5b9731SStan Tomov int ierr; 287f5b9731SStan Tomov Ceed ceed; 29e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 30e0582403Sabdelfattah83 CeedInt dim, ncomp, ndof; 31e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 32e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 33e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 34e0582403Sabdelfattah83 35e0582403Sabdelfattah83 Ceed_Magma *data; 36e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 37e0582403Sabdelfattah83 387f5b9731SStan Tomov const CeedScalar *u; 397f5b9731SStan Tomov CeedScalar *v; 40868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 41e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChkBackend(ierr); 427f5b9731SStan Tomov } else if (emode != CEED_EVAL_WEIGHT) { 437f5b9731SStan Tomov // LCOV_EXCL_START 44e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 457f5b9731SStan Tomov "An input vector is required for this CeedEvalMode"); 467f5b9731SStan Tomov // LCOV_EXCL_STOP 477f5b9731SStan Tomov } 48e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr); 497f5b9731SStan Tomov 507f5b9731SStan Tomov CeedBasis_Magma *impl; 51e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 527f5b9731SStan Tomov 537f5b9731SStan Tomov CeedInt P1d, Q1d; 54e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr); 55e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr); 567f5b9731SStan Tomov 57*3f21f6b1SJeremy L Thompson CeedDebug(ceed, "\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", 587f5b9731SStan Tomov ncomp*CeedIntPow(P1d, dim), ncomp); 597f5b9731SStan Tomov 607f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 617f5b9731SStan Tomov CeedInt length; 62e15f9bd0SJeremy L Thompson ierr = CeedVectorGetLength(V, &length); CeedChkBackend(ierr); 6380a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 6480a9ef05SNatalie Beams magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) v, length, 6580a9ef05SNatalie Beams data->queue); 6680a9ef05SNatalie Beams } else { 6780a9ef05SNatalie Beams magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) v, length, 6880a9ef05SNatalie Beams data->queue); 6980a9ef05SNatalie Beams } 70e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 717f5b9731SStan Tomov } 723513a710Sjeremylt switch (emode) { 733513a710Sjeremylt case CEED_EVAL_INTERP: { 747f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 757f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 767f5b9731SStan Tomov P = Q1d; Q = P1d; 777f5b9731SStan Tomov } 787f5b9731SStan Tomov 797f5b9731SStan Tomov // Define element sizes for dofs/quad 807f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 817f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 827f5b9731SStan Tomov 837f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 84868539c2SNatalie Beams // component component 85868539c2SNatalie Beams // elem elem 867f5b9731SStan Tomov // node node 877f5b9731SStan Tomov 887f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 897f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 907f5b9731SStan Tomov 917f5b9731SStan Tomov // Element strides 92868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 937f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 947f5b9731SStan Tomov // Component strides 95868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 967f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 977f5b9731SStan Tomov 987f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 997f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1007f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 1017f5b9731SStan Tomov // Element strides 102868539c2SNatalie Beams v_elstride = eldofssize; 1037f5b9731SStan Tomov u_elstride = elquadsize; 1047f5b9731SStan Tomov // Component strides 105868539c2SNatalie Beams v_compstride = nelem * eldofssize; 1067f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1077f5b9731SStan Tomov } 1087f5b9731SStan Tomov 109e0582403Sabdelfattah83 ierr = magma_interp(P, Q, dim, ncomp, 1107f5b9731SStan Tomov impl->dinterp1d, tmode, 111868539c2SNatalie Beams u, u_elstride, u_compstride, 112868539c2SNatalie Beams v, v_elstride, v_compstride, 113e0582403Sabdelfattah83 nelem, data->basis_kernel_mode, data->maxthreads, 114e0582403Sabdelfattah83 data->queue); 11542e449dbSjeremylt if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 116e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_interp"); 1177f5b9731SStan Tomov } 1183513a710Sjeremylt break; 1193513a710Sjeremylt case CEED_EVAL_GRAD: { 1207f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 1217f5b9731SStan Tomov // In CEED_NOTRANSPOSE mode: 1227f5b9731SStan Tomov // u is (P^dim x nc), column-major layout (nc = ncomp) 1237f5b9731SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 1247f5b9731SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 1257f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1267f5b9731SStan Tomov P = Q1d, Q = P1d; 1277f5b9731SStan Tomov } 1287f5b9731SStan Tomov 1297f5b9731SStan Tomov // Define element sizes for dofs/quad 1307f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 1317f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 1327f5b9731SStan Tomov 1337f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 1347f5b9731SStan Tomov // dim 135868539c2SNatalie Beams // component component 136868539c2SNatalie Beams // elem elem 1377f5b9731SStan Tomov // node node 1387f5b9731SStan Tomov 1397f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 1407f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 1417f5b9731SStan Tomov 1427f5b9731SStan Tomov // Element strides 143868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 1447f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 1457f5b9731SStan Tomov // Component strides 146868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 1477f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 1487f5b9731SStan Tomov // Dimension strides 1497f5b9731SStan Tomov CeedInt u_dimstride = 0; 1507f5b9731SStan Tomov CeedInt v_dimstride = nelem * elquadsize * ncomp; 1517f5b9731SStan Tomov 1527f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 1537f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1547f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 1557f5b9731SStan Tomov // Element strides 156868539c2SNatalie Beams v_elstride = eldofssize; 1577f5b9731SStan Tomov u_elstride = elquadsize; 1587f5b9731SStan Tomov // Component strides 159868539c2SNatalie Beams v_compstride = nelem * eldofssize; 1607f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1617f5b9731SStan Tomov // Dimension strides 1627f5b9731SStan Tomov v_dimstride = 0; 1637f5b9731SStan Tomov u_dimstride = nelem * elquadsize * ncomp; 1647f5b9731SStan Tomov 1657f5b9731SStan Tomov } 1667f5b9731SStan Tomov 167e0582403Sabdelfattah83 ierr = magma_grad( P, Q, dim, ncomp, 1687f5b9731SStan Tomov impl->dinterp1d, impl->dgrad1d, tmode, 169e0582403Sabdelfattah83 u, u_elstride, u_compstride, u_dimstride, 170e0582403Sabdelfattah83 v, v_elstride, v_compstride, v_dimstride, 171e0582403Sabdelfattah83 nelem, data->basis_kernel_mode, data->maxthreads, 172e0582403Sabdelfattah83 data->queue); 17342e449dbSjeremylt if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 174e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_grad"); 1757f5b9731SStan Tomov } 1763513a710Sjeremylt break; 1773513a710Sjeremylt case CEED_EVAL_WEIGHT: { 1787f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) 1797f5b9731SStan Tomov // LCOV_EXCL_START 180e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1817f5b9731SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 1827f5b9731SStan Tomov // LCOV_EXCL_STOP 1837f5b9731SStan Tomov CeedInt Q = Q1d; 1847f5b9731SStan Tomov int eldofssize = CeedIntPow(Q, dim); 185e0582403Sabdelfattah83 ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem, 186e0582403Sabdelfattah83 data->basis_kernel_mode, data->maxthreads, data->queue); 18742e449dbSjeremylt if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 188e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_weight"); 1897f5b9731SStan Tomov } 1903513a710Sjeremylt break; 1913513a710Sjeremylt // LCOV_EXCL_START 1923513a710Sjeremylt case CEED_EVAL_DIV: 193e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 1943513a710Sjeremylt case CEED_EVAL_CURL: 195e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 1963513a710Sjeremylt case CEED_EVAL_NONE: 197e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1983513a710Sjeremylt "CEED_EVAL_NONE does not make sense in this context"); 1993513a710Sjeremylt // LCOV_EXCL_STOP 2003513a710Sjeremylt } 2017f5b9731SStan Tomov 202e0582403Sabdelfattah83 // must sync to ensure completeness 203e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 204e0582403Sabdelfattah83 2057f5b9731SStan Tomov if (emode!=CEED_EVAL_WEIGHT) { 206e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U, &u); CeedChkBackend(ierr); 2077f5b9731SStan Tomov } 208e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(V, &v); CeedChkBackend(ierr); 209e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2107f5b9731SStan Tomov } 2117f5b9731SStan Tomov 2127f5b9731SStan Tomov #ifdef __cplusplus 2137f5b9731SStan Tomov CEED_INTERN "C" 2147f5b9731SStan Tomov #endif 21580a9ef05SNatalie Beams int CeedBasisApplyNonTensor_f64_Magma(CeedBasis basis, CeedInt nelem, 216868539c2SNatalie Beams CeedTransposeMode tmode, CeedEvalMode emode, 217868539c2SNatalie Beams CeedVector U, CeedVector V) { 218868539c2SNatalie Beams int ierr; 219868539c2SNatalie Beams Ceed ceed; 220e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 221e0582403Sabdelfattah83 222e0582403Sabdelfattah83 Ceed_Magma *data; 223e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 224e0582403Sabdelfattah83 225868539c2SNatalie Beams CeedInt dim, ncomp, ndof, nqpt; 226e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 227e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 228e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 229e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr); 230868539c2SNatalie Beams const CeedScalar *du; 231868539c2SNatalie Beams CeedScalar *dv; 232868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 233e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); 234868539c2SNatalie Beams } else if (emode != CEED_EVAL_WEIGHT) { 235868539c2SNatalie Beams // LCOV_EXCL_START 236e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 237868539c2SNatalie Beams "An input vector is required for this CeedEvalMode"); 238868539c2SNatalie Beams // LCOV_EXCL_STOP 239868539c2SNatalie Beams } 240e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); 241868539c2SNatalie Beams 242868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 243e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 244868539c2SNatalie Beams 245*3f21f6b1SJeremy L Thompson CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 246868539c2SNatalie Beams ncomp*ndof, ncomp); 247868539c2SNatalie Beams 248868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 249868539c2SNatalie Beams CeedInt length; 250868539c2SNatalie Beams ierr = CeedVectorGetLength(V, &length); 25180a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 25280a9ef05SNatalie Beams magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length, 25380a9ef05SNatalie Beams data->queue); 25480a9ef05SNatalie Beams } else { 25580a9ef05SNatalie Beams magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length, 25680a9ef05SNatalie Beams data->queue); 25780a9ef05SNatalie Beams } 258e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 259868539c2SNatalie Beams } 26080a9ef05SNatalie Beams 261868539c2SNatalie Beams switch (emode) { 262868539c2SNatalie Beams case CEED_EVAL_INTERP: { 263868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 264868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 265e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 266868539c2SNatalie Beams P, nelem*ncomp, Q, 26780a9ef05SNatalie Beams 1.0, (double *)impl->dinterp, P, 26880a9ef05SNatalie Beams (double *)du, Q, 26980a9ef05SNatalie Beams 0.0, (double *)dv, P, data->queue); 270868539c2SNatalie Beams else 271e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 272868539c2SNatalie Beams Q, nelem*ncomp, P, 27380a9ef05SNatalie Beams 1.0, (double *)impl->dinterp, P, 27480a9ef05SNatalie Beams (double *)du, P, 27580a9ef05SNatalie Beams 0.0, (double *)dv, Q, data->queue); 276868539c2SNatalie Beams } 277868539c2SNatalie Beams break; 278868539c2SNatalie Beams 279868539c2SNatalie Beams case CEED_EVAL_GRAD: { 280868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 281868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 28280a9ef05SNatalie Beams CeedScalar beta = 0.0; 283868539c2SNatalie Beams for(int d=0; d<dim; d++) { 284868539c2SNatalie Beams if (d>0) 285868539c2SNatalie Beams beta = 1.0; 286e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 287868539c2SNatalie Beams P, nelem*ncomp, Q, 28880a9ef05SNatalie Beams 1.0, (double *)(impl->dgrad + d*P*Q), P, 28980a9ef05SNatalie Beams (double *)(du + d*nelem*ncomp*Q), Q, 29080a9ef05SNatalie Beams beta, (double *)dv, P, data->queue); 291868539c2SNatalie Beams } 292868539c2SNatalie Beams } else { 293868539c2SNatalie Beams for(int d=0; d< dim; d++) 294e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 295868539c2SNatalie Beams Q, nelem*ncomp, P, 29680a9ef05SNatalie Beams 1.0, (double *)(impl->dgrad + d*P*Q), P, 29780a9ef05SNatalie Beams (double *)du, P, 29880a9ef05SNatalie Beams 0.0, (double *)(dv + d*nelem*ncomp*Q), Q, data->queue); 29980a9ef05SNatalie Beams } 30080a9ef05SNatalie Beams } 30180a9ef05SNatalie Beams break; 30280a9ef05SNatalie Beams 30380a9ef05SNatalie Beams case CEED_EVAL_WEIGHT: { 30480a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) 30580a9ef05SNatalie Beams // LCOV_EXCL_START 30680a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, 30780a9ef05SNatalie Beams "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 30880a9ef05SNatalie Beams // LCOV_EXCL_STOP 30980a9ef05SNatalie Beams 31080a9ef05SNatalie Beams int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 31180a9ef05SNatalie Beams int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 31280a9ef05SNatalie Beams 1 : 0 ); 31380a9ef05SNatalie Beams magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 31480a9ef05SNatalie Beams data->queue); 31580a9ef05SNatalie Beams CeedChkBackend(ierr); 31680a9ef05SNatalie Beams } 31780a9ef05SNatalie Beams break; 31880a9ef05SNatalie Beams 31980a9ef05SNatalie Beams // LCOV_EXCL_START 32080a9ef05SNatalie Beams case CEED_EVAL_DIV: 32180a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 32280a9ef05SNatalie Beams case CEED_EVAL_CURL: 32380a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 32480a9ef05SNatalie Beams case CEED_EVAL_NONE: 32580a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, 32680a9ef05SNatalie Beams "CEED_EVAL_NONE does not make sense in this context"); 32780a9ef05SNatalie Beams // LCOV_EXCL_STOP 32880a9ef05SNatalie Beams } 32980a9ef05SNatalie Beams 33080a9ef05SNatalie Beams // must sync to ensure completeness 33180a9ef05SNatalie Beams ceed_magma_queue_sync( data->queue ); 33280a9ef05SNatalie Beams 33380a9ef05SNatalie Beams if (emode!=CEED_EVAL_WEIGHT) { 33480a9ef05SNatalie Beams ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr); 33580a9ef05SNatalie Beams } 33680a9ef05SNatalie Beams ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr); 33780a9ef05SNatalie Beams return CEED_ERROR_SUCCESS; 33880a9ef05SNatalie Beams } 33980a9ef05SNatalie Beams 34080a9ef05SNatalie Beams int CeedBasisApplyNonTensor_f32_Magma(CeedBasis basis, CeedInt nelem, 34180a9ef05SNatalie Beams CeedTransposeMode tmode, CeedEvalMode emode, 34280a9ef05SNatalie Beams CeedVector U, CeedVector V) { 34380a9ef05SNatalie Beams int ierr; 34480a9ef05SNatalie Beams Ceed ceed; 34580a9ef05SNatalie Beams ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 34680a9ef05SNatalie Beams 34780a9ef05SNatalie Beams Ceed_Magma *data; 34880a9ef05SNatalie Beams ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 34980a9ef05SNatalie Beams 35080a9ef05SNatalie Beams CeedInt dim, ncomp, ndof, nqpt; 35180a9ef05SNatalie Beams ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 35280a9ef05SNatalie Beams ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 35380a9ef05SNatalie Beams ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 35480a9ef05SNatalie Beams ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr); 35580a9ef05SNatalie Beams const CeedScalar *du; 35680a9ef05SNatalie Beams CeedScalar *dv; 35780a9ef05SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 35880a9ef05SNatalie Beams ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); 35980a9ef05SNatalie Beams } else if (emode != CEED_EVAL_WEIGHT) { 36080a9ef05SNatalie Beams // LCOV_EXCL_START 36180a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, 36280a9ef05SNatalie Beams "An input vector is required for this CeedEvalMode"); 36380a9ef05SNatalie Beams // LCOV_EXCL_STOP 36480a9ef05SNatalie Beams } 36580a9ef05SNatalie Beams ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); 36680a9ef05SNatalie Beams 36780a9ef05SNatalie Beams CeedBasisNonTensor_Magma *impl; 36880a9ef05SNatalie Beams ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 36980a9ef05SNatalie Beams 370*3f21f6b1SJeremy L Thompson CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 37180a9ef05SNatalie Beams ncomp*ndof, ncomp); 37280a9ef05SNatalie Beams 37380a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) { 37480a9ef05SNatalie Beams CeedInt length; 37580a9ef05SNatalie Beams ierr = CeedVectorGetLength(V, &length); 37680a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 37780a9ef05SNatalie Beams magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length, 37880a9ef05SNatalie Beams data->queue); 37980a9ef05SNatalie Beams } else { 38080a9ef05SNatalie Beams magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length, 38180a9ef05SNatalie Beams data->queue); 38280a9ef05SNatalie Beams } 38380a9ef05SNatalie Beams ceed_magma_queue_sync( data->queue ); 38480a9ef05SNatalie Beams } 38580a9ef05SNatalie Beams 38680a9ef05SNatalie Beams switch (emode) { 38780a9ef05SNatalie Beams case CEED_EVAL_INTERP: { 38880a9ef05SNatalie Beams CeedInt P = ndof, Q = nqpt; 38980a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) 39080a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 39180a9ef05SNatalie Beams P, nelem*ncomp, Q, 39280a9ef05SNatalie Beams 1.0, (float *)impl->dinterp, P, 39380a9ef05SNatalie Beams (float *)du, Q, 39480a9ef05SNatalie Beams 0.0, (float *)dv, P, data->queue); 39580a9ef05SNatalie Beams else 39680a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans, 39780a9ef05SNatalie Beams Q, nelem*ncomp, P, 39880a9ef05SNatalie Beams 1.0, (float *)impl->dinterp, P, 39980a9ef05SNatalie Beams (float *)du, P, 40080a9ef05SNatalie Beams 0.0, (float *)dv, Q, data->queue); 40180a9ef05SNatalie Beams } 40280a9ef05SNatalie Beams break; 40380a9ef05SNatalie Beams 40480a9ef05SNatalie Beams case CEED_EVAL_GRAD: { 40580a9ef05SNatalie Beams CeedInt P = ndof, Q = nqpt; 40680a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) { 40780a9ef05SNatalie Beams CeedScalar beta = 0.0; 40880a9ef05SNatalie Beams for(int d=0; d<dim; d++) { 40980a9ef05SNatalie Beams if (d>0) 41080a9ef05SNatalie Beams beta = 1.0; 41180a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 41280a9ef05SNatalie Beams P, nelem*ncomp, Q, 41380a9ef05SNatalie Beams 1.0, (float *)(impl->dgrad + d*P*Q), P, 41480a9ef05SNatalie Beams (float *)(du + d*nelem*ncomp*Q), Q, 41580a9ef05SNatalie Beams beta, (float *)dv, P, data->queue); 41680a9ef05SNatalie Beams } 41780a9ef05SNatalie Beams } else { 41880a9ef05SNatalie Beams for(int d=0; d< dim; d++) 41980a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans, 42080a9ef05SNatalie Beams Q, nelem*ncomp, P, 42180a9ef05SNatalie Beams 1.0, (float *)(impl->dgrad + d*P*Q), P, 42280a9ef05SNatalie Beams (float *)du, P, 42380a9ef05SNatalie Beams 0.0, (float *)(dv + d*nelem*ncomp*Q), Q, data->queue); 424868539c2SNatalie Beams } 425868539c2SNatalie Beams } 426868539c2SNatalie Beams break; 427868539c2SNatalie Beams 428868539c2SNatalie Beams case CEED_EVAL_WEIGHT: { 429868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 430868539c2SNatalie Beams // LCOV_EXCL_START 431e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 432868539c2SNatalie Beams "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 433868539c2SNatalie Beams // LCOV_EXCL_STOP 434868539c2SNatalie Beams 435868539c2SNatalie Beams int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 436868539c2SNatalie Beams int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 437868539c2SNatalie Beams 1 : 0 ); 438e0582403Sabdelfattah83 magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 439e0582403Sabdelfattah83 data->queue); 440e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 441868539c2SNatalie Beams } 442868539c2SNatalie Beams break; 443868539c2SNatalie Beams 444868539c2SNatalie Beams // LCOV_EXCL_START 445868539c2SNatalie Beams case CEED_EVAL_DIV: 446e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 447868539c2SNatalie Beams case CEED_EVAL_CURL: 448e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 449868539c2SNatalie Beams case CEED_EVAL_NONE: 450e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 451868539c2SNatalie Beams "CEED_EVAL_NONE does not make sense in this context"); 452868539c2SNatalie Beams // LCOV_EXCL_STOP 453868539c2SNatalie Beams } 454868539c2SNatalie Beams 455e0582403Sabdelfattah83 // must sync to ensure completeness 456e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 457e0582403Sabdelfattah83 458868539c2SNatalie Beams if (emode!=CEED_EVAL_WEIGHT) { 459e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr); 460868539c2SNatalie Beams } 461e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr); 462e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 463868539c2SNatalie Beams } 464868539c2SNatalie Beams 465868539c2SNatalie Beams #ifdef __cplusplus 466868539c2SNatalie Beams CEED_INTERN "C" 467868539c2SNatalie Beams #endif 4683513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) { 4697f5b9731SStan Tomov int ierr; 4707f5b9731SStan Tomov CeedBasis_Magma *impl; 471e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 4727f5b9731SStan Tomov 473e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqref1d); CeedChkBackend(ierr); 474e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr); 475e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr); 476e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr); 4777f5b9731SStan Tomov 478e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 4797f5b9731SStan Tomov 480e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4817f5b9731SStan Tomov } 4827f5b9731SStan Tomov 4837f5b9731SStan Tomov #ifdef __cplusplus 4847f5b9731SStan Tomov CEED_INTERN "C" 4857f5b9731SStan Tomov #endif 486868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 487868539c2SNatalie Beams int ierr; 488868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 489e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 490868539c2SNatalie Beams 491e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqref); CeedChkBackend(ierr); 492e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dinterp); CeedChkBackend(ierr); 493e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dgrad); CeedChkBackend(ierr); 494e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqweight); CeedChkBackend(ierr); 495868539c2SNatalie Beams 496e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 497868539c2SNatalie Beams 498e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 499868539c2SNatalie Beams } 500868539c2SNatalie Beams 501868539c2SNatalie Beams #ifdef __cplusplus 502868539c2SNatalie Beams CEED_INTERN "C" 503868539c2SNatalie Beams #endif 5043513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, 5053513a710Sjeremylt const CeedScalar *interp1d, 5067f5b9731SStan Tomov const CeedScalar *grad1d, 5077f5b9731SStan Tomov const CeedScalar *qref1d, 5083513a710Sjeremylt const CeedScalar *qweight1d, CeedBasis basis) { 5097f5b9731SStan Tomov int ierr; 5107f5b9731SStan Tomov CeedBasis_Magma *impl; 5117f5b9731SStan Tomov Ceed ceed; 512e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 5137f5b9731SStan Tomov 514c9f8acf2SJeremy L Thompson // Check for supported parameters 515c9f8acf2SJeremy L Thompson CeedInt ncomp = 0; 516e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 517c9f8acf2SJeremy L Thompson if (ncomp > 3) 518c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 519e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 520c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 3 components"); 521c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 522c9f8acf2SJeremy L Thompson if (P1d > 10) 523c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 524e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 525c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 10 nodes in each dimension"); 526c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 527c9f8acf2SJeremy L Thompson if (Q1d > 10) 528c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 529e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 530c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension"); 531c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 532c9f8acf2SJeremy L Thompson 533e0582403Sabdelfattah83 Ceed_Magma *data; 534e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 535e0582403Sabdelfattah83 5367f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 537e15f9bd0SJeremy L Thompson CeedBasisApply_Magma); CeedChkBackend(ierr); 5387f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 539e15f9bd0SJeremy L Thompson CeedBasisDestroy_Magma); CeedChkBackend(ierr); 5407f5b9731SStan Tomov 541e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 542e15f9bd0SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 5437f5b9731SStan Tomov 5447f5b9731SStan Tomov // Copy qref1d to the GPU 5457f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); 546e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 547e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1, 548e0582403Sabdelfattah83 data->queue); 5497f5b9731SStan Tomov 5507f5b9731SStan Tomov // Copy interp1d to the GPU 5517f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0])); 552e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 553e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1, 554e0582403Sabdelfattah83 data->queue); 5557f5b9731SStan Tomov 5567f5b9731SStan Tomov // Copy grad1d to the GPU 5577f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0])); 558e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 559e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1, 560e0582403Sabdelfattah83 data->queue); 5617f5b9731SStan Tomov 5627f5b9731SStan Tomov // Copy qweight1d to the GPU 5637f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0])); 564e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 565e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1, 566e0582403Sabdelfattah83 data->queue); 5677f5b9731SStan Tomov 568e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5697f5b9731SStan Tomov } 5707f5b9731SStan Tomov 5717f5b9731SStan Tomov #ifdef __cplusplus 5727f5b9731SStan Tomov CEED_INTERN "C" 5737f5b9731SStan Tomov #endif 5743513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, 5753513a710Sjeremylt CeedInt nqpts, const CeedScalar *interp, 5763513a710Sjeremylt const CeedScalar *grad, const CeedScalar *qref, 5773513a710Sjeremylt const CeedScalar *qweight, CeedBasis basis) { 5787f5b9731SStan Tomov int ierr; 579868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 5807f5b9731SStan Tomov Ceed ceed; 581e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 5827f5b9731SStan Tomov 583e0582403Sabdelfattah83 Ceed_Magma *data; 584e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 585e0582403Sabdelfattah83 58680a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) { 587868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 58880a9ef05SNatalie Beams CeedBasisApplyNonTensor_f64_Magma); 58980a9ef05SNatalie Beams CeedChkBackend(ierr); 59080a9ef05SNatalie Beams } else { 59180a9ef05SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 59280a9ef05SNatalie Beams CeedBasisApplyNonTensor_f32_Magma); 59380a9ef05SNatalie Beams CeedChkBackend(ierr); 59480a9ef05SNatalie Beams } 595868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 596e15f9bd0SJeremy L Thompson CeedBasisDestroyNonTensor_Magma); CeedChkBackend(ierr); 597868539c2SNatalie Beams 598e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 599e15f9bd0SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 600868539c2SNatalie Beams 601868539c2SNatalie Beams // Copy qref to the GPU 602868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0])); 603e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 604e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue); 605868539c2SNatalie Beams 606868539c2SNatalie Beams // Copy interp to the GPU 607868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0])); 608e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 609e0582403Sabdelfattah83 magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1, 610e0582403Sabdelfattah83 data->queue); 611868539c2SNatalie Beams 612868539c2SNatalie Beams // Copy grad to the GPU 613868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0])); 614e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 615e0582403Sabdelfattah83 magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1, 616e0582403Sabdelfattah83 data->queue); 617868539c2SNatalie Beams 618868539c2SNatalie Beams // Copy qweight to the GPU 619868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0])); 620e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 621e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1, 622e0582403Sabdelfattah83 data->queue); 623868539c2SNatalie Beams 624e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6257f5b9731SStan Tomov } 626