xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 7f5b97318525f14c2c6b9e4eb41d4a37b3a25b45)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-magma.h"
18 
19 #ifdef __cplusplus
20 CEED_INTERN "C"
21 #endif
22 int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
23 			 CeedTransposeMode tmode, CeedEvalMode emode,
24 			 CeedVector U, CeedVector V)
25 {
26   int ierr;
27   Ceed ceed;
28   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
29   CeedInt dim, ncomp, ndof, nqpt;
30   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
31   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
32   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
33   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
34   const CeedScalar *u;
35   CeedScalar *v;
36   if (U) {
37     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
38   } else if (emode != CEED_EVAL_WEIGHT) {
39     // LCOV_EXCL_START
40     return CeedError(ceed, 1,
41                      "An input vector is required for this CeedEvalMode");
42     // LCOV_EXCL_STOP
43   }
44   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
45 
46   CeedBasis_Magma *impl;
47   ierr = CeedBasisGetData(basis, (void*)&impl); CeedChk(ierr);
48 
49   CeedInt P1d, Q1d;
50   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
51   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
52 
53   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
54             ncomp*CeedIntPow(P1d, dim), ncomp);
55 
56   if (tmode == CEED_TRANSPOSE) {
57      CeedInt length;
58      ierr = CeedVectorGetLength(V, &length);
59      magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length );
60   }
61   if (emode & CEED_EVAL_INTERP) {
62     CeedInt P = P1d, Q = Q1d;
63     if (tmode == CEED_TRANSPOSE) {
64       P = Q1d; Q = P1d;
65     }
66 
67     // Define element sizes for dofs/quad
68     CeedInt elquadsize = CeedIntPow(Q1d, dim);
69     CeedInt eldofssize = CeedIntPow(P1d, dim);
70 
71      // E-vector ordering -------------- Q-vector ordering
72      //  elem                            component
73      //    component                        elem
74      //       node                            node
75 
76     // ---  Define strides for NOTRANSPOSE mode: ---
77     // Input (u) is E-vector, output (v) is Q-vector
78 
79     // Element strides
80     CeedInt u_elstride = ncomp * eldofssize;
81     CeedInt v_elstride = elquadsize;
82     // Component strides
83     CeedInt u_compstride = eldofssize;
84     CeedInt v_compstride = nelem * elquadsize;
85 
86     // ---  Swap strides for TRANSPOSE mode: ---
87     if(tmode == CEED_TRANSPOSE) {
88         // Input (u) is Q-vector, output (v) is E-vector
89         // Element strides
90         v_elstride = ncomp * eldofssize;
91         u_elstride = elquadsize;
92         // Component strides
93         v_compstride = eldofssize;
94         u_compstride = nelem * elquadsize;
95     }
96 
97     // Loop through components and apply batch over elements
98     for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++){
99            magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp,
100 	   				       impl->dinterp1d, tmode,
101    					       u + u_compstride * comp_ctr, u_elstride,
102 					       v + v_compstride * comp_ctr, v_elstride,
103 					       nelem);
104      }
105 
106   }
107   if (emode & CEED_EVAL_GRAD) {
108     CeedInt P = P1d, Q = Q1d;
109     // In CEED_NOTRANSPOSE mode:
110     // u is (P^dim x nc), column-major layout (nc = ncomp)
111     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
112     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
113     if (tmode == CEED_TRANSPOSE) {
114       P = Q1d, Q = P1d;
115     }
116 
117     // Define element sizes for dofs/quad
118     CeedInt elquadsize = CeedIntPow(Q1d, dim);
119     CeedInt eldofssize = CeedIntPow(P1d, dim);
120 
121      // E-vector ordering -------------- Q-vector ordering
122      //                                  dim
123      //  elem                             component
124      //    component                        elem
125      //       node                            node
126 
127 
128     // ---  Define strides for NOTRANSPOSE mode: ---
129     // Input (u) is E-vector, output (v) is Q-vector
130 
131     // Element strides
132     CeedInt u_elstride = ncomp * eldofssize;
133     CeedInt v_elstride = elquadsize;
134     // Component strides
135     CeedInt u_compstride = eldofssize;
136     CeedInt v_compstride = nelem * elquadsize;
137     // Dimension strides
138     CeedInt u_dimstride = 0;
139     CeedInt v_dimstride = nelem * elquadsize * ncomp;
140 
141     // ---  Swap strides for TRANSPOSE mode: ---
142     if(tmode == CEED_TRANSPOSE) {
143         // Input (u) is Q-vector, output (v) is E-vector
144         // Element strides
145         v_elstride = ncomp * eldofssize;
146         u_elstride = elquadsize;
147         // Component strides
148         v_compstride = eldofssize;
149         u_compstride = nelem * elquadsize;
150         // Dimension strides
151         v_dimstride = 0;
152         u_dimstride = nelem * elquadsize * ncomp;
153 
154       }
155 
156      // Loop through grad dimensions and components, batch call over elements
157      for(CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++){
158         for (CeedInt comp_ctr = 0; comp_ctr < ncomp; comp_ctr++){
159              magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt,
160 	    				     impl->dinterp1d, impl->dgrad1d, tmode,
161    					     u + dim_ctr * u_dimstride + u_compstride * comp_ctr,
162                                              u_elstride,
163 					     v + dim_ctr * v_dimstride + v_compstride * comp_ctr,
164                                              v_elstride,
165 					     nelem, dim_ctr);
166         }
167      }
168    }
169   if (emode & CEED_EVAL_WEIGHT) {
170     if (tmode == CEED_TRANSPOSE)
171       // LCOV_EXCL_START
172       return CeedError(ceed, 1,
173                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
174     // LCOV_EXCL_STOP
175     CeedInt Q = Q1d;
176     int eldofssize = CeedIntPow(Q, dim);
177     magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d,
178 					       v, eldofssize,
179 					       nelem);
180   }
181 
182   if(emode!=CEED_EVAL_WEIGHT) {
183     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
184   }
185   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
186   return 0;
187 }
188 
189 #ifdef __cplusplus
190 CEED_INTERN "C"
191 #endif
192 int CeedBasisDestroy_Magma(CeedBasis basis)
193 {
194   int ierr;
195   CeedBasis_Magma *impl;
196   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
197 
198   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
199   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
200   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
201   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
202 
203   ierr = CeedFree(&impl); CeedChk(ierr);
204 
205   return 0;
206 }
207 
208 #ifdef __cplusplus
209 CEED_INTERN "C"
210 #endif
211 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
212     CeedInt Q1d, const CeedScalar *interp1d,
213     const CeedScalar *grad1d,
214     const CeedScalar *qref1d,
215     const CeedScalar *qweight1d,
216     CeedBasis basis)
217 {
218   int ierr;
219   CeedBasis_Magma *impl;
220   Ceed ceed;
221   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
222 
223   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
224                                 CeedBasisApply_Magma); CeedChk(ierr);
225   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
226                                 CeedBasisDestroy_Magma); CeedChk(ierr);
227 
228   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
229   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
230 
231   // Copy qref1d to the GPU
232   ierr = magma_malloc((void**)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
233   CeedChk(ierr);
234   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1);
235 
236   // Copy interp1d to the GPU
237   ierr = magma_malloc((void**)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
238   CeedChk(ierr);
239   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1);
240 
241   // Copy grad1d to the GPU
242   ierr = magma_malloc((void**)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
243   CeedChk(ierr);
244   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1);
245 
246   // Copy qweight1d to the GPU
247   ierr = magma_malloc((void**)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
248   CeedChk(ierr);
249   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1);
250 
251   return 0;
252 }
253 
254 #ifdef __cplusplus
255 CEED_INTERN "C"
256 #endif
257 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
258                                    CeedInt ndof, CeedInt nqpts,
259                                    const CeedScalar *interp,
260                                    const CeedScalar *grad,
261                                    const CeedScalar *qref,
262                                    const CeedScalar *qweight,
263                                    CeedBasis basis)
264 {
265   int ierr;
266   Ceed ceed;
267   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
268 
269   return CeedError(ceed, 1, "Backend does not implement non-tensor bases");
270 }
271