xref: /libCEED/backends/magma/ceed-magma-basis.c (revision cb32e2e7f026784d97a57f1901677e9727def907)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-magma.h"
18 
19 #ifdef __cplusplus
20 CEED_INTERN "C"
21 #endif
22 int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
23                          CeedTransposeMode tmode, CeedEvalMode emode,
24                          CeedVector U, CeedVector V) {
25   int ierr;
26   Ceed ceed;
27   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
28   CeedInt dim, ncomp, ndof, nqpt;
29   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
30   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
31   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
32   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
33   const CeedScalar *u;
34   CeedScalar *v;
35   if (U) {
36     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
37   } else if (emode != CEED_EVAL_WEIGHT) {
38     // LCOV_EXCL_START
39     return CeedError(ceed, 1,
40                      "An input vector is required for this CeedEvalMode");
41     // LCOV_EXCL_STOP
42   }
43   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
44 
45   CeedBasis_Magma *impl;
46   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
47 
48   CeedInt P1d, Q1d;
49   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
50   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
51 
52   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
53             ncomp*CeedIntPow(P1d, dim), ncomp);
54 
55   if (tmode == CEED_TRANSPOSE) {
56     CeedInt length;
57     ierr = CeedVectorGetLength(V, &length);
58     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length);
59   }
60   switch (emode) {
61   case CEED_EVAL_INTERP: {
62     CeedInt P = P1d, Q = Q1d;
63     if (tmode == CEED_TRANSPOSE) {
64       P = Q1d; Q = P1d;
65     }
66 
67     // Define element sizes for dofs/quad
68     CeedInt elquadsize = CeedIntPow(Q1d, dim);
69     CeedInt eldofssize = CeedIntPow(P1d, dim);
70 
71     // E-vector ordering -------------- Q-vector ordering
72     //  elem                            component
73     //    component                        elem
74     //       node                            node
75 
76     // ---  Define strides for NOTRANSPOSE mode: ---
77     // Input (u) is E-vector, output (v) is Q-vector
78 
79     // Element strides
80     CeedInt u_elstride = ncomp * eldofssize;
81     CeedInt v_elstride = elquadsize;
82     // Component strides
83     CeedInt u_compstride = eldofssize;
84     CeedInt v_compstride = nelem * elquadsize;
85 
86     // ---  Swap strides for TRANSPOSE mode: ---
87     if (tmode == CEED_TRANSPOSE) {
88       // Input (u) is Q-vector, output (v) is E-vector
89       // Element strides
90       v_elstride = ncomp * eldofssize;
91       u_elstride = elquadsize;
92       // Component strides
93       v_compstride = eldofssize;
94       u_compstride = nelem * elquadsize;
95     }
96 
97     // Loop through components and apply batch over elements
98     for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++)
99       magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp,
100           impl->dinterp1d, tmode,
101           u + u_compstride * comp_ctr, u_elstride,
102           v + v_compstride * comp_ctr, v_elstride,
103           nelem);
104   }
105   break;
106   case CEED_EVAL_GRAD: {
107     CeedInt P = P1d, Q = Q1d;
108     // In CEED_NOTRANSPOSE mode:
109     // u is (P^dim x nc), column-major layout (nc = ncomp)
110     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
111     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
112     if (tmode == CEED_TRANSPOSE) {
113       P = Q1d, Q = P1d;
114     }
115 
116     // Define element sizes for dofs/quad
117     CeedInt elquadsize = CeedIntPow(Q1d, dim);
118     CeedInt eldofssize = CeedIntPow(P1d, dim);
119 
120     // E-vector ordering -------------- Q-vector ordering
121     //                                  dim
122     //  elem                             component
123     //    component                        elem
124     //       node                            node
125 
126 
127     // ---  Define strides for NOTRANSPOSE mode: ---
128     // Input (u) is E-vector, output (v) is Q-vector
129 
130     // Element strides
131     CeedInt u_elstride = ncomp * eldofssize;
132     CeedInt v_elstride = elquadsize;
133     // Component strides
134     CeedInt u_compstride = eldofssize;
135     CeedInt v_compstride = nelem * elquadsize;
136     // Dimension strides
137     CeedInt u_dimstride = 0;
138     CeedInt v_dimstride = nelem * elquadsize * ncomp;
139 
140     // ---  Swap strides for TRANSPOSE mode: ---
141     if (tmode == CEED_TRANSPOSE) {
142       // Input (u) is Q-vector, output (v) is E-vector
143       // Element strides
144       v_elstride = ncomp * eldofssize;
145       u_elstride = elquadsize;
146       // Component strides
147       v_compstride = eldofssize;
148       u_compstride = nelem * elquadsize;
149       // Dimension strides
150       v_dimstride = 0;
151       u_dimstride = nelem * elquadsize * ncomp;
152 
153     }
154 
155     // Loop through grad dimensions and components, batch call over elements
156     for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++)
157       for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++)
158         magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt,
159             impl->dinterp1d, impl->dgrad1d, tmode,
160             u + dim_ctr * u_dimstride + u_compstride * comp_ctr, u_elstride,
161             v + dim_ctr * v_dimstride + v_compstride * comp_ctr,
162             v_elstride, nelem, dim_ctr);
163   }
164   break;
165   case CEED_EVAL_WEIGHT: {
166     if (tmode == CEED_TRANSPOSE)
167       // LCOV_EXCL_START
168       return CeedError(ceed, 1,
169                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
170     // LCOV_EXCL_STOP
171     CeedInt Q = Q1d;
172     int eldofssize = CeedIntPow(Q, dim);
173     magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d,
174         v, eldofssize,
175         nelem);
176   }
177   break;
178   // LCOV_EXCL_START
179   case CEED_EVAL_DIV:
180     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
181   case CEED_EVAL_CURL:
182     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
183   case CEED_EVAL_NONE:
184     return CeedError(ceed, 1,
185                      "CEED_EVAL_NONE does not make sense in this context");
186     // LCOV_EXCL_STOP
187   }
188 
189   if (emode!=CEED_EVAL_WEIGHT) {
190     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
191   }
192   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
193   return 0;
194 }
195 
196 #ifdef __cplusplus
197 CEED_INTERN "C"
198 #endif
199 int CeedBasisDestroy_Magma(CeedBasis basis) {
200   int ierr;
201   CeedBasis_Magma *impl;
202   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
203 
204   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
205   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
206   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
207   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
208 
209   ierr = CeedFree(&impl); CeedChk(ierr);
210 
211   return 0;
212 }
213 
214 #ifdef __cplusplus
215 CEED_INTERN "C"
216 #endif
217 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
218                                   const CeedScalar *interp1d,
219                                   const CeedScalar *grad1d,
220                                   const CeedScalar *qref1d,
221                                   const CeedScalar *qweight1d, CeedBasis basis) {
222   int ierr;
223   CeedBasis_Magma *impl;
224   Ceed ceed;
225   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
226 
227   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
228                                 CeedBasisApply_Magma); CeedChk(ierr);
229   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
230                                 CeedBasisDestroy_Magma); CeedChk(ierr);
231 
232   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
233   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
234 
235   // Copy qref1d to the GPU
236   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
237   CeedChk(ierr);
238   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1);
239 
240   // Copy interp1d to the GPU
241   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
242   CeedChk(ierr);
243   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1);
244 
245   // Copy grad1d to the GPU
246   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
247   CeedChk(ierr);
248   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1);
249 
250   // Copy qweight1d to the GPU
251   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
252   CeedChk(ierr);
253   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1);
254 
255   return 0;
256 }
257 
258 #ifdef __cplusplus
259 CEED_INTERN "C"
260 #endif
261 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
262                             CeedInt nqpts, const CeedScalar *interp,
263                             const CeedScalar *grad, const CeedScalar *qref,
264                             const CeedScalar *qweight, CeedBasis basis) {
265   int ierr;
266   Ceed ceed;
267   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
268 
269   return CeedError(ceed, 1, "Backend does not implement non-tensor bases");
270 }
271