xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 868539c291cd6e4adc5c1e2f0ea123f6c9e198f6) !
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-magma.h"
18 
19 #ifdef __cplusplus
20 CEED_INTERN "C"
21 #endif
22 int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
23                          CeedTransposeMode tmode, CeedEvalMode emode,
24                          CeedVector U, CeedVector V) {
25   int ierr;
26   Ceed ceed;
27   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
28   CeedInt dim, ncomp, ndof, nqpt;
29   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
30   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
31   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
32   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
33   const CeedScalar *u;
34   CeedScalar *v;
35   if (emode != CEED_EVAL_WEIGHT) {
36     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChk(ierr);
37   } else if (emode != CEED_EVAL_WEIGHT) {
38     // LCOV_EXCL_START
39     return CeedError(ceed, 1,
40                      "An input vector is required for this CeedEvalMode");
41     // LCOV_EXCL_STOP
42   }
43   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChk(ierr);
44 
45   CeedBasis_Magma *impl;
46   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
47 
48   CeedInt P1d, Q1d;
49   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
50   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
51 
52   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
53             ncomp*CeedIntPow(P1d, dim), ncomp);
54 
55   if (tmode == CEED_TRANSPOSE) {
56     CeedInt length;
57     ierr = CeedVectorGetLength(V, &length);
58     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length);
59   }
60   switch (emode) {
61   case CEED_EVAL_INTERP: {
62     CeedInt P = P1d, Q = Q1d;
63     if (tmode == CEED_TRANSPOSE) {
64       P = Q1d; Q = P1d;
65     }
66 
67     // Define element sizes for dofs/quad
68     CeedInt elquadsize = CeedIntPow(Q1d, dim);
69     CeedInt eldofssize = CeedIntPow(P1d, dim);
70 
71     // E-vector ordering -------------- Q-vector ordering
72     //  component                        component
73     //    elem                             elem
74     //       node                            node
75 
76     // ---  Define strides for NOTRANSPOSE mode: ---
77     // Input (u) is E-vector, output (v) is Q-vector
78 
79     // Element strides
80     CeedInt u_elstride = eldofssize;
81     CeedInt v_elstride = elquadsize;
82     // Component strides
83     CeedInt u_compstride = nelem * eldofssize;
84     CeedInt v_compstride = nelem * elquadsize;
85 
86     // ---  Swap strides for TRANSPOSE mode: ---
87     if (tmode == CEED_TRANSPOSE) {
88       // Input (u) is Q-vector, output (v) is E-vector
89       // Element strides
90       v_elstride = eldofssize;
91       u_elstride = elquadsize;
92       // Component strides
93       v_compstride = nelem * eldofssize;
94       u_compstride = nelem * elquadsize;
95     }
96 
97     // Loop through components and apply batch over elements
98     magmablas_dbasis_apply_batched_eval_interp(P, Q, dim, ncomp,
99         impl->dinterp1d, tmode,
100         u, u_elstride, u_compstride,
101         v, v_elstride, v_compstride,
102         nelem);
103   }
104   break;
105   case CEED_EVAL_GRAD: {
106     CeedInt P = P1d, Q = Q1d;
107     // In CEED_NOTRANSPOSE mode:
108     // u is (P^dim x nc), column-major layout (nc = ncomp)
109     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
110     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
111     if (tmode == CEED_TRANSPOSE) {
112       P = Q1d, Q = P1d;
113     }
114 
115     // Define element sizes for dofs/quad
116     CeedInt elquadsize = CeedIntPow(Q1d, dim);
117     CeedInt eldofssize = CeedIntPow(P1d, dim);
118 
119     // E-vector ordering -------------- Q-vector ordering
120     //                                  dim
121     //  component                        component
122     //    elem                              elem
123     //       node                            node
124 
125 
126     // ---  Define strides for NOTRANSPOSE mode: ---
127     // Input (u) is E-vector, output (v) is Q-vector
128 
129     // Element strides
130     CeedInt u_elstride = eldofssize;
131     CeedInt v_elstride = elquadsize;
132     // Component strides
133     CeedInt u_compstride = nelem * eldofssize;
134     CeedInt v_compstride = nelem * elquadsize;
135     // Dimension strides
136     CeedInt u_dimstride = 0;
137     CeedInt v_dimstride = nelem * elquadsize * ncomp;
138 
139     // ---  Swap strides for TRANSPOSE mode: ---
140     if (tmode == CEED_TRANSPOSE) {
141       // Input (u) is Q-vector, output (v) is E-vector
142       // Element strides
143       v_elstride = eldofssize;
144       u_elstride = elquadsize;
145       // Component strides
146       v_compstride = nelem * eldofssize;
147       u_compstride = nelem * elquadsize;
148       // Dimension strides
149       v_dimstride = 0;
150       u_dimstride = nelem * elquadsize * ncomp;
151 
152     }
153 
154     // Loop through grad dimensions only, batch call over elements and components
155     for (CeedInt dim_ctr = 0; dim_ctr < dim; dim_ctr++)
156       magmablas_dbasis_apply_batched_eval_grad(P, Q, dim, ncomp, nqpt,
157           impl->dinterp1d, impl->dgrad1d, tmode,
158           u + dim_ctr * u_dimstride, u_elstride, u_compstride, u_dimstride,
159           v + dim_ctr * v_dimstride, v_elstride, v_compstride, v_dimstride,
160           dim_ctr, nelem);
161   }
162   break;
163   case CEED_EVAL_WEIGHT: {
164     if (tmode == CEED_TRANSPOSE)
165       // LCOV_EXCL_START
166       return CeedError(ceed, 1,
167                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
168     // LCOV_EXCL_STOP
169     CeedInt Q = Q1d;
170     int eldofssize = CeedIntPow(Q, dim);
171     magmablas_dbasis_apply_batched_eval_weight(Q, dim, impl->dqweight1d,
172         v, eldofssize,
173         nelem);
174   }
175   break;
176   // LCOV_EXCL_START
177   case CEED_EVAL_DIV:
178     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
179   case CEED_EVAL_CURL:
180     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
181   case CEED_EVAL_NONE:
182     return CeedError(ceed, 1,
183                      "CEED_EVAL_NONE does not make sense in this context");
184     // LCOV_EXCL_STOP
185   }
186 
187   if (emode!=CEED_EVAL_WEIGHT) {
188     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr);
189   }
190   ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr);
191   return 0;
192 }
193 
194 #ifdef __cplusplus
195 CEED_INTERN "C"
196 #endif
197 int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem,
198                                   CeedTransposeMode tmode, CeedEvalMode emode,
199                                   CeedVector U, CeedVector V) {
200   int ierr;
201   Ceed ceed;
202   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
203   CeedInt dim, ncomp, ndof, nqpt;
204   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
205   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
206   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr);
207   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr);
208   const CeedScalar *du;
209   CeedScalar *dv;
210   if (emode != CEED_EVAL_WEIGHT) {
211     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChk(ierr);
212   } else if (emode != CEED_EVAL_WEIGHT) {
213     // LCOV_EXCL_START
214     return CeedError(ceed, 1,
215                      "An input vector is required for this CeedEvalMode");
216     // LCOV_EXCL_STOP
217   }
218   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChk(ierr);
219 
220   CeedBasisNonTensor_Magma *impl;
221   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
222 
223   CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
224             ncomp*ndof, ncomp);
225 
226   if (tmode == CEED_TRANSPOSE) {
227     CeedInt length;
228     ierr = CeedVectorGetLength(V, &length);
229     magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length);
230   }
231   switch (emode) {
232   case CEED_EVAL_INTERP: {
233     CeedInt P = ndof, Q = nqpt;
234     if (tmode == CEED_TRANSPOSE)
235       magma_dgemm(MagmaNoTrans, MagmaNoTrans,
236                   P, nelem*ncomp, Q,
237                   1.0, impl->dinterp, P,
238                   du, Q,
239                   0.0, dv, P);
240     else
241       magma_dgemm(MagmaTrans, MagmaNoTrans,
242                   Q, nelem*ncomp, P,
243                   1.0, impl->dinterp, P,
244                   du, P,
245                   0.0, dv, Q);
246   }
247   break;
248 
249   case CEED_EVAL_GRAD: {
250     CeedInt P = ndof, Q = nqpt;
251     if (tmode == CEED_TRANSPOSE) {
252       double beta = 0.0;
253       for(int d=0; d<dim; d++) {
254         if (d>0)
255           beta = 1.0;
256         magma_dgemm(MagmaNoTrans, MagmaNoTrans,
257                     P, nelem*ncomp, Q,
258                     1.0, impl->dgrad + d*P*Q, P,
259                     du + d*nelem*ncomp*Q, Q,
260                     beta, dv, P);
261       }
262     } else {
263       for(int d=0; d< dim; d++)
264         magma_dgemm(MagmaTrans, MagmaNoTrans,
265                     Q, nelem*ncomp, P,
266                     1.0, impl->dgrad + d*P*Q, P,
267                     du, P,
268                     0.0, dv + d*nelem*ncomp*Q, Q);
269     }
270   }
271   break;
272 
273   case CEED_EVAL_WEIGHT: {
274     if (tmode == CEED_TRANSPOSE)
275       // LCOV_EXCL_START
276       return CeedError(ceed, 1,
277                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
278     // LCOV_EXCL_STOP
279 
280     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
281     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
282                                        1 : 0 );
283     magma_weight(grid, nqpt, nelem, nqpt, impl->dqweight, dv);
284     CeedChk(ierr);
285   }
286   break;
287 
288   // LCOV_EXCL_START
289   case CEED_EVAL_DIV:
290     return CeedError(ceed, 1, "CEED_EVAL_DIV not supported");
291   case CEED_EVAL_CURL:
292     return CeedError(ceed, 1, "CEED_EVAL_CURL not supported");
293   case CEED_EVAL_NONE:
294     return CeedError(ceed, 1,
295                      "CEED_EVAL_NONE does not make sense in this context");
296     // LCOV_EXCL_STOP
297   }
298 
299   if(emode!=CEED_EVAL_WEIGHT) {
300     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChk(ierr);
301   }
302   ierr = CeedVectorRestoreArray(V, &dv); CeedChk(ierr);
303   return 0;
304 }
305 
306 #ifdef __cplusplus
307 CEED_INTERN "C"
308 #endif
309 int CeedBasisDestroy_Magma(CeedBasis basis) {
310   int ierr;
311   CeedBasis_Magma *impl;
312   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
313 
314   ierr = magma_free(impl->dqref1d); CeedChk(ierr);
315   ierr = magma_free(impl->dinterp1d); CeedChk(ierr);
316   ierr = magma_free(impl->dgrad1d); CeedChk(ierr);
317   ierr = magma_free(impl->dqweight1d); CeedChk(ierr);
318 
319   ierr = CeedFree(&impl); CeedChk(ierr);
320 
321   return 0;
322 }
323 
324 #ifdef __cplusplus
325 CEED_INTERN "C"
326 #endif
327 int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
328   int ierr;
329   CeedBasisNonTensor_Magma *impl;
330   ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr);
331 
332   ierr = magma_free(impl->dqref); CeedChk(ierr);
333   ierr = magma_free(impl->dinterp); CeedChk(ierr);
334   ierr = magma_free(impl->dgrad); CeedChk(ierr);
335   ierr = magma_free(impl->dqweight); CeedChk(ierr);
336 
337   ierr = CeedFree(&impl); CeedChk(ierr);
338 
339   return 0;
340 }
341 
342 #ifdef __cplusplus
343 CEED_INTERN "C"
344 #endif
345 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
346                                   const CeedScalar *interp1d,
347                                   const CeedScalar *grad1d,
348                                   const CeedScalar *qref1d,
349                                   const CeedScalar *qweight1d, CeedBasis basis) {
350   int ierr;
351   CeedBasis_Magma *impl;
352   Ceed ceed;
353   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
354 
355   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
356                                 CeedBasisApply_Magma); CeedChk(ierr);
357   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
358                                 CeedBasisDestroy_Magma); CeedChk(ierr);
359 
360   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
361   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
362 
363   // Copy qref1d to the GPU
364   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
365   CeedChk(ierr);
366   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1);
367 
368   // Copy interp1d to the GPU
369   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
370   CeedChk(ierr);
371   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1);
372 
373   // Copy grad1d to the GPU
374   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
375   CeedChk(ierr);
376   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1);
377 
378   // Copy qweight1d to the GPU
379   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
380   CeedChk(ierr);
381   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1);
382 
383   return 0;
384 }
385 
386 #ifdef __cplusplus
387 CEED_INTERN "C"
388 #endif
389 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
390                             CeedInt nqpts, const CeedScalar *interp,
391                             const CeedScalar *grad, const CeedScalar *qref,
392                             const CeedScalar *qweight, CeedBasis basis) {
393   int ierr;
394   CeedBasisNonTensor_Magma *impl;
395   Ceed ceed;
396   ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
397 
398   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
399                                 CeedBasisApplyNonTensor_Magma); CeedChk(ierr);
400   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
401                                 CeedBasisDestroyNonTensor_Magma); CeedChk(ierr);
402 
403   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
404   ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr);
405 
406   // Copy qref to the GPU
407   ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0]));
408   CeedChk(ierr);
409   magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1);
410 
411   // Copy interp to the GPU
412   ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0]));
413   CeedChk(ierr);
414   magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1);
415 
416   // Copy grad to the GPU
417   ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0]));
418   CeedChk(ierr);
419   magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1);
420 
421   // Copy qweight to the GPU
422   ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0]));
423   CeedChk(ierr);
424   magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1);
425 
426   return 0;
427 }
428