1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 37f5b9731SStan Tomov // 4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 57f5b9731SStan Tomov // 6*3d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 77f5b9731SStan Tomov 8ec3da8bcSJed Brown #include <ceed/ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 107f5b9731SStan Tomov #include "ceed-magma.h" 117f5b9731SStan Tomov 127f5b9731SStan Tomov #ifdef __cplusplus 137f5b9731SStan Tomov CEED_INTERN "C" 147f5b9731SStan Tomov #endif 157f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem, 167f5b9731SStan Tomov CeedTransposeMode tmode, CeedEvalMode emode, 173513a710Sjeremylt CeedVector U, CeedVector V) { 187f5b9731SStan Tomov int ierr; 197f5b9731SStan Tomov Ceed ceed; 20e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 21e0582403Sabdelfattah83 CeedInt dim, ncomp, ndof; 22e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 23e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 24e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 25e0582403Sabdelfattah83 26e0582403Sabdelfattah83 Ceed_Magma *data; 27e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 28e0582403Sabdelfattah83 297f5b9731SStan Tomov const CeedScalar *u; 307f5b9731SStan Tomov CeedScalar *v; 31868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 32e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChkBackend(ierr); 337f5b9731SStan Tomov } else if (emode != CEED_EVAL_WEIGHT) { 347f5b9731SStan Tomov // LCOV_EXCL_START 35e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 367f5b9731SStan Tomov "An input vector is required for this CeedEvalMode"); 377f5b9731SStan Tomov // LCOV_EXCL_STOP 387f5b9731SStan Tomov } 399c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr); 407f5b9731SStan Tomov 417f5b9731SStan Tomov CeedBasis_Magma *impl; 42e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 437f5b9731SStan Tomov 447f5b9731SStan Tomov CeedInt P1d, Q1d; 45e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr); 46e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr); 477f5b9731SStan Tomov 483f21f6b1SJeremy L Thompson CeedDebug(ceed, "\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d", 497f5b9731SStan Tomov ncomp*CeedIntPow(P1d, dim), ncomp); 507f5b9731SStan Tomov 517f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 521f9221feSJeremy L Thompson CeedSize length; 53e15f9bd0SJeremy L Thompson ierr = CeedVectorGetLength(V, &length); CeedChkBackend(ierr); 5480a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 5580a9ef05SNatalie Beams magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) v, length, 5680a9ef05SNatalie Beams data->queue); 5780a9ef05SNatalie Beams } else { 5880a9ef05SNatalie Beams magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) v, length, 5980a9ef05SNatalie Beams data->queue); 6080a9ef05SNatalie Beams } 61e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 627f5b9731SStan Tomov } 633513a710Sjeremylt switch (emode) { 643513a710Sjeremylt case CEED_EVAL_INTERP: { 657f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 667f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 677f5b9731SStan Tomov P = Q1d; Q = P1d; 687f5b9731SStan Tomov } 697f5b9731SStan Tomov 707f5b9731SStan Tomov // Define element sizes for dofs/quad 717f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 727f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 737f5b9731SStan Tomov 747f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 75868539c2SNatalie Beams // component component 76868539c2SNatalie Beams // elem elem 777f5b9731SStan Tomov // node node 787f5b9731SStan Tomov 797f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 807f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 817f5b9731SStan Tomov 827f5b9731SStan Tomov // Element strides 83868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 847f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 857f5b9731SStan Tomov // Component strides 86868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 877f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 887f5b9731SStan Tomov 897f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 907f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 917f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 927f5b9731SStan Tomov // Element strides 93868539c2SNatalie Beams v_elstride = eldofssize; 947f5b9731SStan Tomov u_elstride = elquadsize; 957f5b9731SStan Tomov // Component strides 96868539c2SNatalie Beams v_compstride = nelem * eldofssize; 977f5b9731SStan Tomov u_compstride = nelem * elquadsize; 987f5b9731SStan Tomov } 997f5b9731SStan Tomov 100e0582403Sabdelfattah83 ierr = magma_interp(P, Q, dim, ncomp, 1017f5b9731SStan Tomov impl->dinterp1d, tmode, 102868539c2SNatalie Beams u, u_elstride, u_compstride, 103868539c2SNatalie Beams v, v_elstride, v_compstride, 104f71aa81bSnbeams nelem, data->basis_kernel_mode, 105e0582403Sabdelfattah83 data->queue); 10642e449dbSjeremylt if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 107e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_interp"); 1087f5b9731SStan Tomov } 1093513a710Sjeremylt break; 1103513a710Sjeremylt case CEED_EVAL_GRAD: { 1117f5b9731SStan Tomov CeedInt P = P1d, Q = Q1d; 1127f5b9731SStan Tomov // In CEED_NOTRANSPOSE mode: 1137f5b9731SStan Tomov // u is (P^dim x nc), column-major layout (nc = ncomp) 1147f5b9731SStan Tomov // v is (Q^dim x nc x dim), column-major layout (nc = ncomp) 1157f5b9731SStan Tomov // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 1167f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1177f5b9731SStan Tomov P = Q1d, Q = P1d; 1187f5b9731SStan Tomov } 1197f5b9731SStan Tomov 1207f5b9731SStan Tomov // Define element sizes for dofs/quad 1217f5b9731SStan Tomov CeedInt elquadsize = CeedIntPow(Q1d, dim); 1227f5b9731SStan Tomov CeedInt eldofssize = CeedIntPow(P1d, dim); 1237f5b9731SStan Tomov 1247f5b9731SStan Tomov // E-vector ordering -------------- Q-vector ordering 1257f5b9731SStan Tomov // dim 126868539c2SNatalie Beams // component component 127868539c2SNatalie Beams // elem elem 1287f5b9731SStan Tomov // node node 1297f5b9731SStan Tomov 1307f5b9731SStan Tomov // --- Define strides for NOTRANSPOSE mode: --- 1317f5b9731SStan Tomov // Input (u) is E-vector, output (v) is Q-vector 1327f5b9731SStan Tomov 1337f5b9731SStan Tomov // Element strides 134868539c2SNatalie Beams CeedInt u_elstride = eldofssize; 1357f5b9731SStan Tomov CeedInt v_elstride = elquadsize; 1367f5b9731SStan Tomov // Component strides 137868539c2SNatalie Beams CeedInt u_compstride = nelem * eldofssize; 1387f5b9731SStan Tomov CeedInt v_compstride = nelem * elquadsize; 1397f5b9731SStan Tomov // Dimension strides 1407f5b9731SStan Tomov CeedInt u_dimstride = 0; 1417f5b9731SStan Tomov CeedInt v_dimstride = nelem * elquadsize * ncomp; 1427f5b9731SStan Tomov 1437f5b9731SStan Tomov // --- Swap strides for TRANSPOSE mode: --- 1447f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) { 1457f5b9731SStan Tomov // Input (u) is Q-vector, output (v) is E-vector 1467f5b9731SStan Tomov // Element strides 147868539c2SNatalie Beams v_elstride = eldofssize; 1487f5b9731SStan Tomov u_elstride = elquadsize; 1497f5b9731SStan Tomov // Component strides 150868539c2SNatalie Beams v_compstride = nelem * eldofssize; 1517f5b9731SStan Tomov u_compstride = nelem * elquadsize; 1527f5b9731SStan Tomov // Dimension strides 1537f5b9731SStan Tomov v_dimstride = 0; 1547f5b9731SStan Tomov u_dimstride = nelem * elquadsize * ncomp; 1557f5b9731SStan Tomov 1567f5b9731SStan Tomov } 1577f5b9731SStan Tomov 158e0582403Sabdelfattah83 ierr = magma_grad( P, Q, dim, ncomp, 1597f5b9731SStan Tomov impl->dinterp1d, impl->dgrad1d, tmode, 160e0582403Sabdelfattah83 u, u_elstride, u_compstride, u_dimstride, 161e0582403Sabdelfattah83 v, v_elstride, v_compstride, v_dimstride, 162f71aa81bSnbeams nelem, data->basis_kernel_mode, 163e0582403Sabdelfattah83 data->queue); 16442e449dbSjeremylt if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 165e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_grad"); 1667f5b9731SStan Tomov } 1673513a710Sjeremylt break; 1683513a710Sjeremylt case CEED_EVAL_WEIGHT: { 1697f5b9731SStan Tomov if (tmode == CEED_TRANSPOSE) 1707f5b9731SStan Tomov // LCOV_EXCL_START 171e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1727f5b9731SStan Tomov "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 1737f5b9731SStan Tomov // LCOV_EXCL_STOP 1747f5b9731SStan Tomov CeedInt Q = Q1d; 1757f5b9731SStan Tomov int eldofssize = CeedIntPow(Q, dim); 176e0582403Sabdelfattah83 ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem, 177f71aa81bSnbeams data->basis_kernel_mode, data->queue); 17842e449dbSjeremylt if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND, 179e0582403Sabdelfattah83 "MAGMA: launch failure detected for magma_weight"); 1807f5b9731SStan Tomov } 1813513a710Sjeremylt break; 1823513a710Sjeremylt // LCOV_EXCL_START 1833513a710Sjeremylt case CEED_EVAL_DIV: 184e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 1853513a710Sjeremylt case CEED_EVAL_CURL: 186e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 1873513a710Sjeremylt case CEED_EVAL_NONE: 188e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 1893513a710Sjeremylt "CEED_EVAL_NONE does not make sense in this context"); 1903513a710Sjeremylt // LCOV_EXCL_STOP 1913513a710Sjeremylt } 1927f5b9731SStan Tomov 193e0582403Sabdelfattah83 // must sync to ensure completeness 194e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 195e0582403Sabdelfattah83 1967f5b9731SStan Tomov if (emode!=CEED_EVAL_WEIGHT) { 197e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U, &u); CeedChkBackend(ierr); 1987f5b9731SStan Tomov } 199e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(V, &v); CeedChkBackend(ierr); 200e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2017f5b9731SStan Tomov } 2027f5b9731SStan Tomov 2037f5b9731SStan Tomov #ifdef __cplusplus 2047f5b9731SStan Tomov CEED_INTERN "C" 2057f5b9731SStan Tomov #endif 20680a9ef05SNatalie Beams int CeedBasisApplyNonTensor_f64_Magma(CeedBasis basis, CeedInt nelem, 207868539c2SNatalie Beams CeedTransposeMode tmode, CeedEvalMode emode, 208868539c2SNatalie Beams CeedVector U, CeedVector V) { 209868539c2SNatalie Beams int ierr; 210868539c2SNatalie Beams Ceed ceed; 211e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 212e0582403Sabdelfattah83 213e0582403Sabdelfattah83 Ceed_Magma *data; 214e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 215e0582403Sabdelfattah83 216868539c2SNatalie Beams CeedInt dim, ncomp, ndof, nqpt; 217e15f9bd0SJeremy L Thompson ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 218e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 219e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 220e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr); 221868539c2SNatalie Beams const CeedScalar *du; 222868539c2SNatalie Beams CeedScalar *dv; 223868539c2SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 224e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); 225868539c2SNatalie Beams } else if (emode != CEED_EVAL_WEIGHT) { 226868539c2SNatalie Beams // LCOV_EXCL_START 227e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 228868539c2SNatalie Beams "An input vector is required for this CeedEvalMode"); 229868539c2SNatalie Beams // LCOV_EXCL_STOP 230868539c2SNatalie Beams } 2319c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); 232868539c2SNatalie Beams 233868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 234e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 235868539c2SNatalie Beams 2363f21f6b1SJeremy L Thompson CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 237868539c2SNatalie Beams ncomp*ndof, ncomp); 238868539c2SNatalie Beams 239868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 2401f9221feSJeremy L Thompson CeedSize length; 241868539c2SNatalie Beams ierr = CeedVectorGetLength(V, &length); 24280a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 24380a9ef05SNatalie Beams magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length, 24480a9ef05SNatalie Beams data->queue); 24580a9ef05SNatalie Beams } else { 24680a9ef05SNatalie Beams magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length, 24780a9ef05SNatalie Beams data->queue); 24880a9ef05SNatalie Beams } 249e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 250868539c2SNatalie Beams } 25180a9ef05SNatalie Beams 252868539c2SNatalie Beams switch (emode) { 253868539c2SNatalie Beams case CEED_EVAL_INTERP: { 254868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 255868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 256e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 257868539c2SNatalie Beams P, nelem*ncomp, Q, 25880a9ef05SNatalie Beams 1.0, (double *)impl->dinterp, P, 25980a9ef05SNatalie Beams (double *)du, Q, 26080a9ef05SNatalie Beams 0.0, (double *)dv, P, data->queue); 261868539c2SNatalie Beams else 262e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 263868539c2SNatalie Beams Q, nelem*ncomp, P, 26480a9ef05SNatalie Beams 1.0, (double *)impl->dinterp, P, 26580a9ef05SNatalie Beams (double *)du, P, 26680a9ef05SNatalie Beams 0.0, (double *)dv, Q, data->queue); 267868539c2SNatalie Beams } 268868539c2SNatalie Beams break; 269868539c2SNatalie Beams 270868539c2SNatalie Beams case CEED_EVAL_GRAD: { 271868539c2SNatalie Beams CeedInt P = ndof, Q = nqpt; 272868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) { 27380a9ef05SNatalie Beams CeedScalar beta = 0.0; 274868539c2SNatalie Beams for(int d=0; d<dim; d++) { 275868539c2SNatalie Beams if (d>0) 276868539c2SNatalie Beams beta = 1.0; 277e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 278868539c2SNatalie Beams P, nelem*ncomp, Q, 27980a9ef05SNatalie Beams 1.0, (double *)(impl->dgrad + d*P*Q), P, 28080a9ef05SNatalie Beams (double *)(du + d*nelem*ncomp*Q), Q, 28180a9ef05SNatalie Beams beta, (double *)dv, P, data->queue); 282868539c2SNatalie Beams } 283868539c2SNatalie Beams } else { 284868539c2SNatalie Beams for(int d=0; d< dim; d++) 285e0582403Sabdelfattah83 magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans, 286868539c2SNatalie Beams Q, nelem*ncomp, P, 28780a9ef05SNatalie Beams 1.0, (double *)(impl->dgrad + d*P*Q), P, 28880a9ef05SNatalie Beams (double *)du, P, 28980a9ef05SNatalie Beams 0.0, (double *)(dv + d*nelem*ncomp*Q), Q, data->queue); 29080a9ef05SNatalie Beams } 29180a9ef05SNatalie Beams } 29280a9ef05SNatalie Beams break; 29380a9ef05SNatalie Beams 29480a9ef05SNatalie Beams case CEED_EVAL_WEIGHT: { 29580a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) 29680a9ef05SNatalie Beams // LCOV_EXCL_START 29780a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, 29880a9ef05SNatalie Beams "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 29980a9ef05SNatalie Beams // LCOV_EXCL_STOP 30080a9ef05SNatalie Beams 30180a9ef05SNatalie Beams int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 30280a9ef05SNatalie Beams int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 30380a9ef05SNatalie Beams 1 : 0 ); 30480a9ef05SNatalie Beams magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 30580a9ef05SNatalie Beams data->queue); 30680a9ef05SNatalie Beams CeedChkBackend(ierr); 30780a9ef05SNatalie Beams } 30880a9ef05SNatalie Beams break; 30980a9ef05SNatalie Beams 31080a9ef05SNatalie Beams // LCOV_EXCL_START 31180a9ef05SNatalie Beams case CEED_EVAL_DIV: 31280a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 31380a9ef05SNatalie Beams case CEED_EVAL_CURL: 31480a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 31580a9ef05SNatalie Beams case CEED_EVAL_NONE: 31680a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, 31780a9ef05SNatalie Beams "CEED_EVAL_NONE does not make sense in this context"); 31880a9ef05SNatalie Beams // LCOV_EXCL_STOP 31980a9ef05SNatalie Beams } 32080a9ef05SNatalie Beams 32180a9ef05SNatalie Beams // must sync to ensure completeness 32280a9ef05SNatalie Beams ceed_magma_queue_sync( data->queue ); 32380a9ef05SNatalie Beams 32480a9ef05SNatalie Beams if (emode!=CEED_EVAL_WEIGHT) { 32580a9ef05SNatalie Beams ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr); 32680a9ef05SNatalie Beams } 32780a9ef05SNatalie Beams ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr); 32880a9ef05SNatalie Beams return CEED_ERROR_SUCCESS; 32980a9ef05SNatalie Beams } 33080a9ef05SNatalie Beams 33180a9ef05SNatalie Beams int CeedBasisApplyNonTensor_f32_Magma(CeedBasis basis, CeedInt nelem, 33280a9ef05SNatalie Beams CeedTransposeMode tmode, CeedEvalMode emode, 33380a9ef05SNatalie Beams CeedVector U, CeedVector V) { 33480a9ef05SNatalie Beams int ierr; 33580a9ef05SNatalie Beams Ceed ceed; 33680a9ef05SNatalie Beams ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 33780a9ef05SNatalie Beams 33880a9ef05SNatalie Beams Ceed_Magma *data; 33980a9ef05SNatalie Beams ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 34080a9ef05SNatalie Beams 34180a9ef05SNatalie Beams CeedInt dim, ncomp, ndof, nqpt; 34280a9ef05SNatalie Beams ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 34380a9ef05SNatalie Beams ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 34480a9ef05SNatalie Beams ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr); 34580a9ef05SNatalie Beams ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr); 34680a9ef05SNatalie Beams const CeedScalar *du; 34780a9ef05SNatalie Beams CeedScalar *dv; 34880a9ef05SNatalie Beams if (emode != CEED_EVAL_WEIGHT) { 34980a9ef05SNatalie Beams ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr); 35080a9ef05SNatalie Beams } else if (emode != CEED_EVAL_WEIGHT) { 35180a9ef05SNatalie Beams // LCOV_EXCL_START 35280a9ef05SNatalie Beams return CeedError(ceed, CEED_ERROR_BACKEND, 35380a9ef05SNatalie Beams "An input vector is required for this CeedEvalMode"); 35480a9ef05SNatalie Beams // LCOV_EXCL_STOP 35580a9ef05SNatalie Beams } 3569c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr); 35780a9ef05SNatalie Beams 35880a9ef05SNatalie Beams CeedBasisNonTensor_Magma *impl; 35980a9ef05SNatalie Beams ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 36080a9ef05SNatalie Beams 3613f21f6b1SJeremy L Thompson CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d", 36280a9ef05SNatalie Beams ncomp*ndof, ncomp); 36380a9ef05SNatalie Beams 36480a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) { 3651f9221feSJeremy L Thompson CeedSize length; 36680a9ef05SNatalie Beams ierr = CeedVectorGetLength(V, &length); 36780a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) { 36880a9ef05SNatalie Beams magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length, 36980a9ef05SNatalie Beams data->queue); 37080a9ef05SNatalie Beams } else { 37180a9ef05SNatalie Beams magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length, 37280a9ef05SNatalie Beams data->queue); 37380a9ef05SNatalie Beams } 37480a9ef05SNatalie Beams ceed_magma_queue_sync( data->queue ); 37580a9ef05SNatalie Beams } 37680a9ef05SNatalie Beams 37780a9ef05SNatalie Beams switch (emode) { 37880a9ef05SNatalie Beams case CEED_EVAL_INTERP: { 37980a9ef05SNatalie Beams CeedInt P = ndof, Q = nqpt; 38080a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) 38180a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 38280a9ef05SNatalie Beams P, nelem*ncomp, Q, 38380a9ef05SNatalie Beams 1.0, (float *)impl->dinterp, P, 38480a9ef05SNatalie Beams (float *)du, Q, 38580a9ef05SNatalie Beams 0.0, (float *)dv, P, data->queue); 38680a9ef05SNatalie Beams else 38780a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans, 38880a9ef05SNatalie Beams Q, nelem*ncomp, P, 38980a9ef05SNatalie Beams 1.0, (float *)impl->dinterp, P, 39080a9ef05SNatalie Beams (float *)du, P, 39180a9ef05SNatalie Beams 0.0, (float *)dv, Q, data->queue); 39280a9ef05SNatalie Beams } 39380a9ef05SNatalie Beams break; 39480a9ef05SNatalie Beams 39580a9ef05SNatalie Beams case CEED_EVAL_GRAD: { 39680a9ef05SNatalie Beams CeedInt P = ndof, Q = nqpt; 39780a9ef05SNatalie Beams if (tmode == CEED_TRANSPOSE) { 39880a9ef05SNatalie Beams CeedScalar beta = 0.0; 39980a9ef05SNatalie Beams for(int d=0; d<dim; d++) { 40080a9ef05SNatalie Beams if (d>0) 40180a9ef05SNatalie Beams beta = 1.0; 40280a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans, 40380a9ef05SNatalie Beams P, nelem*ncomp, Q, 40480a9ef05SNatalie Beams 1.0, (float *)(impl->dgrad + d*P*Q), P, 40580a9ef05SNatalie Beams (float *)(du + d*nelem*ncomp*Q), Q, 40680a9ef05SNatalie Beams beta, (float *)dv, P, data->queue); 40780a9ef05SNatalie Beams } 40880a9ef05SNatalie Beams } else { 40980a9ef05SNatalie Beams for(int d=0; d< dim; d++) 41080a9ef05SNatalie Beams magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans, 41180a9ef05SNatalie Beams Q, nelem*ncomp, P, 41280a9ef05SNatalie Beams 1.0, (float *)(impl->dgrad + d*P*Q), P, 41380a9ef05SNatalie Beams (float *)du, P, 41480a9ef05SNatalie Beams 0.0, (float *)(dv + d*nelem*ncomp*Q), Q, data->queue); 415868539c2SNatalie Beams } 416868539c2SNatalie Beams } 417868539c2SNatalie Beams break; 418868539c2SNatalie Beams 419868539c2SNatalie Beams case CEED_EVAL_WEIGHT: { 420868539c2SNatalie Beams if (tmode == CEED_TRANSPOSE) 421868539c2SNatalie Beams // LCOV_EXCL_START 422e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 423868539c2SNatalie Beams "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 424868539c2SNatalie Beams // LCOV_EXCL_STOP 425868539c2SNatalie Beams 426868539c2SNatalie Beams int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1; 427868539c2SNatalie Beams int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)? 428868539c2SNatalie Beams 1 : 0 ); 429e0582403Sabdelfattah83 magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv, 430e0582403Sabdelfattah83 data->queue); 431e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 432868539c2SNatalie Beams } 433868539c2SNatalie Beams break; 434868539c2SNatalie Beams 435868539c2SNatalie Beams // LCOV_EXCL_START 436868539c2SNatalie Beams case CEED_EVAL_DIV: 437e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 438868539c2SNatalie Beams case CEED_EVAL_CURL: 439e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 440868539c2SNatalie Beams case CEED_EVAL_NONE: 441e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 442868539c2SNatalie Beams "CEED_EVAL_NONE does not make sense in this context"); 443868539c2SNatalie Beams // LCOV_EXCL_STOP 444868539c2SNatalie Beams } 445868539c2SNatalie Beams 446e0582403Sabdelfattah83 // must sync to ensure completeness 447e0582403Sabdelfattah83 ceed_magma_queue_sync( data->queue ); 448e0582403Sabdelfattah83 449868539c2SNatalie Beams if (emode!=CEED_EVAL_WEIGHT) { 450e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr); 451868539c2SNatalie Beams } 452e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr); 453e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 454868539c2SNatalie Beams } 455868539c2SNatalie Beams 456868539c2SNatalie Beams #ifdef __cplusplus 457868539c2SNatalie Beams CEED_INTERN "C" 458868539c2SNatalie Beams #endif 4593513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) { 4607f5b9731SStan Tomov int ierr; 4617f5b9731SStan Tomov CeedBasis_Magma *impl; 462e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 4637f5b9731SStan Tomov 464e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqref1d); CeedChkBackend(ierr); 465e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr); 466e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr); 467e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr); 4687f5b9731SStan Tomov 469e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 4707f5b9731SStan Tomov 471e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4727f5b9731SStan Tomov } 4737f5b9731SStan Tomov 4747f5b9731SStan Tomov #ifdef __cplusplus 4757f5b9731SStan Tomov CEED_INTERN "C" 4767f5b9731SStan Tomov #endif 477868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) { 478868539c2SNatalie Beams int ierr; 479868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 480e15f9bd0SJeremy L Thompson ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr); 481868539c2SNatalie Beams 482e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqref); CeedChkBackend(ierr); 483e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dinterp); CeedChkBackend(ierr); 484e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dgrad); CeedChkBackend(ierr); 485e15f9bd0SJeremy L Thompson ierr = magma_free(impl->dqweight); CeedChkBackend(ierr); 486868539c2SNatalie Beams 487e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 488868539c2SNatalie Beams 489e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 490868539c2SNatalie Beams } 491868539c2SNatalie Beams 492868539c2SNatalie Beams #ifdef __cplusplus 493868539c2SNatalie Beams CEED_INTERN "C" 494868539c2SNatalie Beams #endif 4953513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d, 4963513a710Sjeremylt const CeedScalar *interp1d, 4977f5b9731SStan Tomov const CeedScalar *grad1d, 4987f5b9731SStan Tomov const CeedScalar *qref1d, 4993513a710Sjeremylt const CeedScalar *qweight1d, CeedBasis basis) { 5007f5b9731SStan Tomov int ierr; 5017f5b9731SStan Tomov CeedBasis_Magma *impl; 5027f5b9731SStan Tomov Ceed ceed; 503e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 5047f5b9731SStan Tomov 505c9f8acf2SJeremy L Thompson // Check for supported parameters 506c9f8acf2SJeremy L Thompson CeedInt ncomp = 0; 507e15f9bd0SJeremy L Thompson ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 508c9f8acf2SJeremy L Thompson if (ncomp > 3) 509c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 510e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 511c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 3 components"); 512c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 513c9f8acf2SJeremy L Thompson if (P1d > 10) 514c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 515e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 516c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 10 nodes in each dimension"); 517c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 518c9f8acf2SJeremy L Thompson if (Q1d > 10) 519c9f8acf2SJeremy L Thompson // LCOV_EXCL_START 520e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 521c9f8acf2SJeremy L Thompson "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension"); 522c9f8acf2SJeremy L Thompson // LCOV_EXCL_STOP 523c9f8acf2SJeremy L Thompson 524e0582403Sabdelfattah83 Ceed_Magma *data; 525e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 526e0582403Sabdelfattah83 5277f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 528e15f9bd0SJeremy L Thompson CeedBasisApply_Magma); CeedChkBackend(ierr); 5297f5b9731SStan Tomov ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 530e15f9bd0SJeremy L Thompson CeedBasisDestroy_Magma); CeedChkBackend(ierr); 5317f5b9731SStan Tomov 532e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 533e15f9bd0SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 5347f5b9731SStan Tomov 5357f5b9731SStan Tomov // Copy qref1d to the GPU 5367f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0])); 537e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 538e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1, 539e0582403Sabdelfattah83 data->queue); 5407f5b9731SStan Tomov 5417f5b9731SStan Tomov // Copy interp1d to the GPU 5427f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0])); 543e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 544e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1, 545e0582403Sabdelfattah83 data->queue); 5467f5b9731SStan Tomov 5477f5b9731SStan Tomov // Copy grad1d to the GPU 5487f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0])); 549e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 550e0582403Sabdelfattah83 magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1, 551e0582403Sabdelfattah83 data->queue); 5527f5b9731SStan Tomov 5537f5b9731SStan Tomov // Copy qweight1d to the GPU 5547f5b9731SStan Tomov ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0])); 555e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 556e0582403Sabdelfattah83 magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1, 557e0582403Sabdelfattah83 data->queue); 5587f5b9731SStan Tomov 559e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5607f5b9731SStan Tomov } 5617f5b9731SStan Tomov 5627f5b9731SStan Tomov #ifdef __cplusplus 5637f5b9731SStan Tomov CEED_INTERN "C" 5647f5b9731SStan Tomov #endif 5653513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof, 5663513a710Sjeremylt CeedInt nqpts, const CeedScalar *interp, 5673513a710Sjeremylt const CeedScalar *grad, const CeedScalar *qref, 5683513a710Sjeremylt const CeedScalar *qweight, CeedBasis basis) { 5697f5b9731SStan Tomov int ierr; 570868539c2SNatalie Beams CeedBasisNonTensor_Magma *impl; 5717f5b9731SStan Tomov Ceed ceed; 572e15f9bd0SJeremy L Thompson ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 5737f5b9731SStan Tomov 574e0582403Sabdelfattah83 Ceed_Magma *data; 575e15f9bd0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 576e0582403Sabdelfattah83 57780a9ef05SNatalie Beams if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) { 578868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 57980a9ef05SNatalie Beams CeedBasisApplyNonTensor_f64_Magma); 58080a9ef05SNatalie Beams CeedChkBackend(ierr); 58180a9ef05SNatalie Beams } else { 58280a9ef05SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 58380a9ef05SNatalie Beams CeedBasisApplyNonTensor_f32_Magma); 58480a9ef05SNatalie Beams CeedChkBackend(ierr); 58580a9ef05SNatalie Beams } 586868539c2SNatalie Beams ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 587e15f9bd0SJeremy L Thompson CeedBasisDestroyNonTensor_Magma); CeedChkBackend(ierr); 588868539c2SNatalie Beams 589e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr); 590e15f9bd0SJeremy L Thompson ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr); 591868539c2SNatalie Beams 592868539c2SNatalie Beams // Copy qref to the GPU 593868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0])); 594e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 595e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue); 596868539c2SNatalie Beams 597868539c2SNatalie Beams // Copy interp to the GPU 598868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0])); 599e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 600e0582403Sabdelfattah83 magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1, 601e0582403Sabdelfattah83 data->queue); 602868539c2SNatalie Beams 603868539c2SNatalie Beams // Copy grad to the GPU 604868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0])); 605e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 606e0582403Sabdelfattah83 magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1, 607e0582403Sabdelfattah83 data->queue); 608868539c2SNatalie Beams 609868539c2SNatalie Beams // Copy qweight to the GPU 610868539c2SNatalie Beams ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0])); 611e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 612e0582403Sabdelfattah83 magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1, 613e0582403Sabdelfattah83 data->queue); 614868539c2SNatalie Beams 615e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6167f5b9731SStan Tomov } 617