1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 34444f328STzanio // 4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 54444f328STzanio // 6*3d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 74444f328STzanio 890104f39SStan Tomov // magma functions specific to ceed 9972b3d9dSNatalie Beams #ifndef _ceed_magma_h 103d576824SJeremy L Thompson #define _ceed_magma_h 1190104f39SStan Tomov 12ec3da8bcSJed Brown #include <ceed/ceed.h> 13ec3da8bcSJed Brown #include <ceed/backend.h> 14e0582403Sabdelfattah83 #include <magma_v2.h> 15e0582403Sabdelfattah83 16e0582403Sabdelfattah83 typedef enum { 17e0582403Sabdelfattah83 MAGMA_KERNEL_DIM_GENERIC=101, 18e0582403Sabdelfattah83 MAGMA_KERNEL_DIM_SPECIFIC=102 19e0582403Sabdelfattah83 } magma_kernel_mode_t; 20e0582403Sabdelfattah83 21e0582403Sabdelfattah83 typedef struct { 22e0582403Sabdelfattah83 magma_kernel_mode_t basis_kernel_mode; 23e0582403Sabdelfattah83 magma_device_t device; 24e0582403Sabdelfattah83 magma_queue_t queue; 25e0582403Sabdelfattah83 } Ceed_Magma; 265a9ca9adSVeselin Dobrev 277f5b9731SStan Tomov typedef struct { 287f5b9731SStan Tomov CeedScalar *dqref1d; 297f5b9731SStan Tomov CeedScalar *dinterp1d; 307f5b9731SStan Tomov CeedScalar *dgrad1d; 317f5b9731SStan Tomov CeedScalar *dqweight1d; 327f5b9731SStan Tomov } CeedBasis_Magma; 337f5b9731SStan Tomov 347f5b9731SStan Tomov typedef struct { 35868539c2SNatalie Beams CeedScalar *dqref; 36868539c2SNatalie Beams CeedScalar *dinterp; 37868539c2SNatalie Beams CeedScalar *dgrad; 38868539c2SNatalie Beams CeedScalar *dqweight; 39868539c2SNatalie Beams } CeedBasisNonTensor_Magma; 40868539c2SNatalie Beams 41c8b3a627SJed Brown typedef enum { 42c8b3a627SJed Brown OWNED_NONE = 0, 43c8b3a627SJed Brown OWNED_UNPINNED, 44c8b3a627SJed Brown OWNED_PINNED, 45c8b3a627SJed Brown } OwnershipMode; 46c8b3a627SJed Brown 47868539c2SNatalie Beams typedef struct { 48d655899aSNatalie Beams CeedInt *offsets; 49d655899aSNatalie Beams CeedInt *doffsets; 50c8b3a627SJed Brown OwnershipMode own_; 51868539c2SNatalie Beams int down_; // cover a case where we own Device memory 52868539c2SNatalie Beams } CeedElemRestriction_Magma; 53868539c2SNatalie Beams 54868539c2SNatalie Beams typedef struct { 557f5b9731SStan Tomov const CeedScalar **inputs; 567f5b9731SStan Tomov CeedScalar **outputs; 577f5b9731SStan Tomov bool setupdone; 587f5b9731SStan Tomov } CeedQFunction_Magma; 597f5b9731SStan Tomov 6090104f39SStan Tomov #define USE_MAGMA_BATCH 6197ee337cSStan Tomov #define USE_MAGMA_BATCH2 627f5b9731SStan Tomov #define USE_MAGMA_BATCH3 637f5b9731SStan Tomov #define USE_MAGMA_BATCH4 6490104f39SStan Tomov 657f5b9731SStan Tomov #ifdef __cplusplus 667f5b9731SStan Tomov CEED_INTERN { 677f5b9731SStan Tomov #endif 68e0582403Sabdelfattah83 69e0582403Sabdelfattah83 magma_int_t magma_interp_1d( 70e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 71e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 72e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 73e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 74f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 75e0582403Sabdelfattah83 76e0582403Sabdelfattah83 magma_int_t magma_interp_2d( 77e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 78e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 79e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 80e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 81f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 82e0582403Sabdelfattah83 83e0582403Sabdelfattah83 magma_int_t magma_interp_3d( 84e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 85e0582403Sabdelfattah83 const CeedScalar *dT, CeedTransposeMode tmode, 86e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 87e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 88f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 89e0582403Sabdelfattah83 90e0582403Sabdelfattah83 magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q, 91868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 9280a9ef05SNatalie Beams const CeedScalar *dT, CeedTransposeMode tmode, 9380a9ef05SNatalie Beams const CeedScalar *dU, magma_int_t u_elemstride, 94e0582403Sabdelfattah83 magma_int_t cstrdU, 9580a9ef05SNatalie Beams CeedScalar *dV, magma_int_t v_elemstride, 96e0582403Sabdelfattah83 magma_int_t cstrdV, 97e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 987f5b9731SStan Tomov 99e0582403Sabdelfattah83 magma_int_t magma_interp( 100e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, 101868539c2SNatalie Beams magma_int_t dim, magma_int_t ncomp, 10280a9ef05SNatalie Beams const CeedScalar *dT, CeedTransposeMode tmode, 10380a9ef05SNatalie Beams const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 10480a9ef05SNatalie Beams CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 105f71aa81bSnbeams magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 1067f5b9731SStan Tomov 107e0582403Sabdelfattah83 magma_int_t magma_grad_1d( 108e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 109e0582403Sabdelfattah83 const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, 110e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 111e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 112f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 113868539c2SNatalie Beams 114e0582403Sabdelfattah83 magma_int_t magma_gradn_2d( 115e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 116e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 117e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 118e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 119f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 120e0582403Sabdelfattah83 121e0582403Sabdelfattah83 magma_int_t magma_gradt_2d( 122e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 123e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 124e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 125e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 126f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 127e0582403Sabdelfattah83 128e0582403Sabdelfattah83 magma_int_t magma_gradn_3d( 129e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 130e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 131e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 132e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 133f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 134e0582403Sabdelfattah83 135e0582403Sabdelfattah83 magma_int_t magma_gradt_3d( 136e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 137e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 138e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 139e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 140f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 141e0582403Sabdelfattah83 142e0582403Sabdelfattah83 magma_int_t magma_grad_generic( 143e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 144e0582403Sabdelfattah83 const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 145e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 146e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 147e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 148e0582403Sabdelfattah83 149e0582403Sabdelfattah83 magma_int_t magma_grad( 150e0582403Sabdelfattah83 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 151e0582403Sabdelfattah83 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 152e0582403Sabdelfattah83 const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU, 153e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV, 154f71aa81bSnbeams magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 155e0582403Sabdelfattah83 156e0582403Sabdelfattah83 magma_int_t magma_weight_1d( 157e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 158e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 159f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 160e0582403Sabdelfattah83 161e0582403Sabdelfattah83 magma_int_t magma_weight_2d( 162e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 163e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 164f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 165e0582403Sabdelfattah83 166e0582403Sabdelfattah83 magma_int_t magma_weight_3d( 167e0582403Sabdelfattah83 magma_int_t Q, const CeedScalar *dqweight1d, 168e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 169f71aa81bSnbeams magma_int_t nelem, magma_queue_t queue); 170e0582403Sabdelfattah83 171e0582403Sabdelfattah83 magma_int_t magma_weight_generic( 172e0582403Sabdelfattah83 magma_int_t Q, magma_int_t dim, 173e0582403Sabdelfattah83 const CeedScalar *dqweight1d, 174e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t vstride, 175e0582403Sabdelfattah83 magma_int_t nelem, magma_queue_t queue); 176e0582403Sabdelfattah83 177e0582403Sabdelfattah83 magma_int_t magma_weight( 178e0582403Sabdelfattah83 magma_int_t Q, magma_int_t dim, 179e0582403Sabdelfattah83 const CeedScalar *dqweight1d, 180e0582403Sabdelfattah83 CeedScalar *dV, magma_int_t v_stride, 181f71aa81bSnbeams magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 182e0582403Sabdelfattah83 183e0582403Sabdelfattah83 void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 184868539c2SNatalie Beams magma_int_t Q, 18580a9ef05SNatalie Beams CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue); 186e0582403Sabdelfattah83 1878dc8d968Sjeremylt void magma_readDofsOffset(const magma_int_t NCOMP, 1888dc8d968Sjeremylt const magma_int_t compstride, 1898dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 19080a9ef05SNatalie Beams magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, 191e0582403Sabdelfattah83 magma_queue_t queue); 192868539c2SNatalie Beams 1938dc8d968Sjeremylt void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 194266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 19580a9ef05SNatalie Beams const CeedScalar *du, CeedScalar *dv, 196e0582403Sabdelfattah83 magma_queue_t queue); 197266dd7abSnbeams 1988dc8d968Sjeremylt void magma_writeDofsOffset(const magma_int_t NCOMP, 1998dc8d968Sjeremylt const magma_int_t compstride, 2008dc8d968Sjeremylt const magma_int_t esize, const magma_int_t nelem, 20180a9ef05SNatalie Beams magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv, 202e0582403Sabdelfattah83 magma_queue_t queue); 203868539c2SNatalie Beams 2048dc8d968Sjeremylt void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 205266dd7abSnbeams const magma_int_t nelem, magma_int_t *strides, 20680a9ef05SNatalie Beams const CeedScalar *du, CeedScalar *dv, 207e0582403Sabdelfattah83 magma_queue_t queue); 208e0582403Sabdelfattah83 209e0582403Sabdelfattah83 int magma_dgemm_nontensor( 210e0582403Sabdelfattah83 magma_trans_t transA, magma_trans_t transB, 211e0582403Sabdelfattah83 magma_int_t m, magma_int_t n, magma_int_t k, 212e0582403Sabdelfattah83 double alpha, const double *dA, magma_int_t ldda, 213e0582403Sabdelfattah83 const double *dB, magma_int_t lddb, 214e0582403Sabdelfattah83 double beta, double *dC, magma_int_t lddc, 215e0582403Sabdelfattah83 magma_queue_t queue ); 216e0582403Sabdelfattah83 21780a9ef05SNatalie Beams int magma_sgemm_nontensor( 21880a9ef05SNatalie Beams magma_trans_t transA, magma_trans_t transB, 21980a9ef05SNatalie Beams magma_int_t m, magma_int_t n, magma_int_t k, 22080a9ef05SNatalie Beams float alpha, const float *dA, magma_int_t ldda, 22180a9ef05SNatalie Beams const float *dB, magma_int_t lddb, 22280a9ef05SNatalie Beams float beta, float *dC, magma_int_t lddc, 22380a9ef05SNatalie Beams magma_queue_t queue ); 22480a9ef05SNatalie Beams 2257f5b9731SStan Tomov magma_int_t 2267f5b9731SStan Tomov magma_isdevptr(const void *A); 2277f5b9731SStan Tomov 228868539c2SNatalie Beams int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 229868539c2SNatalie Beams CeedInt Q1d, 230868539c2SNatalie Beams const CeedScalar *interp1d, 231868539c2SNatalie Beams const CeedScalar *grad1d, 232868539c2SNatalie Beams const CeedScalar *qref1d, 233868539c2SNatalie Beams const CeedScalar *qweight1d, 234868539c2SNatalie Beams CeedBasis basis); 2357f5b9731SStan Tomov 236868539c2SNatalie Beams int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 237d4f68153Sjeremylt CeedInt ndof, CeedInt nqpts, 238d4f68153Sjeremylt const CeedScalar *interp, 239d4f68153Sjeremylt const CeedScalar *grad, 240d4f68153Sjeremylt const CeedScalar *qref, 241d4f68153Sjeremylt const CeedScalar *qweight, 242d4f68153Sjeremylt CeedBasis basis); 243868539c2SNatalie Beams 244868539c2SNatalie Beams int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 245868539c2SNatalie Beams CeedCopyMode cmode, 246d655899aSNatalie Beams const CeedInt *offsets, 247868539c2SNatalie Beams CeedElemRestriction r); 248868539c2SNatalie Beams 249868539c2SNatalie Beams int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 250868539c2SNatalie Beams const CeedCopyMode cmode, 251d655899aSNatalie Beams const CeedInt *offsets, 252868539c2SNatalie Beams const CeedElemRestriction res); 253a8c028e3SNatalie Beams 254a8c028e3SNatalie Beams int CeedOperatorCreate_Magma(CeedOperator op); 255a8c028e3SNatalie Beams 2567f5b9731SStan Tomov #ifdef __cplusplus 2577f5b9731SStan Tomov } 2587f5b9731SStan Tomov #endif 2597f5b9731SStan Tomov 2607f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function 2617f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr 2627f5b9731SStan Tomov 263e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 264e0582403Sabdelfattah83 // should do nothing 265e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...) 266e0582403Sabdelfattah83 2677f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 2687f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE 2697f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000) 2707f5b9731SStan Tomov #endif 271e0582403Sabdelfattah83 2723d576824SJeremy L Thompson #endif // _ceed_magma_h 273