xref: /libCEED/rust/libceed-sys/c-src/backends/hip-gen/ceed-hip-gen-operator.c (revision e15f9bd09af0280c89b79924fa9af7dd2e3e30be)
17d8d0e25Snbeams // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27d8d0e25Snbeams // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37d8d0e25Snbeams // All Rights reserved. See files LICENSE and NOTICE for details.
47d8d0e25Snbeams //
57d8d0e25Snbeams // This file is part of CEED, a collection of benchmarks, miniapps, software
67d8d0e25Snbeams // libraries and APIs for efficient high-order finite element and spectral
77d8d0e25Snbeams // element discretizations for exascale applications. For more information and
87d8d0e25Snbeams // source code availability see http://github.com/ceed.
97d8d0e25Snbeams //
107d8d0e25Snbeams // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117d8d0e25Snbeams // a collaborative effort of two U.S. Department of Energy organizations (Office
127d8d0e25Snbeams // of Science and the National Nuclear Security Administration) responsible for
137d8d0e25Snbeams // the planning and preparation of a capable exascale ecosystem, including
147d8d0e25Snbeams // software, applications, hardware, advanced system engineering and early
157d8d0e25Snbeams // testbed platforms, in support of the nation's exascale computing imperative.
167d8d0e25Snbeams 
173d576824SJeremy L Thompson #include <ceed.h>
183d576824SJeremy L Thompson #include <ceed-backend.h>
193d576824SJeremy L Thompson #include <stddef.h>
207d8d0e25Snbeams #include "ceed-hip-gen.h"
217d8d0e25Snbeams #include "ceed-hip-gen-operator-build.h"
227d8d0e25Snbeams #include "../hip/ceed-hip-compile.h"
237d8d0e25Snbeams 
247d8d0e25Snbeams //------------------------------------------------------------------------------
257d8d0e25Snbeams // Destroy operator
267d8d0e25Snbeams //------------------------------------------------------------------------------
277d8d0e25Snbeams static int CeedOperatorDestroy_Hip_gen(CeedOperator op) {
287d8d0e25Snbeams   int ierr;
297d8d0e25Snbeams   CeedOperator_Hip_gen *impl;
30*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
31*e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
32*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
337d8d0e25Snbeams }
347d8d0e25Snbeams 
357d8d0e25Snbeams //------------------------------------------------------------------------------
367d8d0e25Snbeams // Apply and add to output
377d8d0e25Snbeams //------------------------------------------------------------------------------
387d8d0e25Snbeams static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector invec,
397d8d0e25Snbeams                                         CeedVector outvec, CeedRequest *request) {
407d8d0e25Snbeams   int ierr;
417d8d0e25Snbeams   Ceed ceed;
42*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
437d8d0e25Snbeams   CeedOperator_Hip_gen *data;
44*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &data); CeedChkBackend(ierr);
457d8d0e25Snbeams   CeedQFunction qf;
467d8d0e25Snbeams   CeedQFunction_Hip_gen *qf_data;
47*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
48*e15f9bd0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, &qf_data); CeedChkBackend(ierr);
497d8d0e25Snbeams   CeedInt nelem, numinputfields, numoutputfields;
50*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumElements(op, &nelem); CeedChkBackend(ierr);
517d8d0e25Snbeams   ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
52*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
537d8d0e25Snbeams   CeedOperatorField *opinputfields, *opoutputfields;
547d8d0e25Snbeams   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
55*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
567d8d0e25Snbeams   CeedQFunctionField *qfinputfields, *qfoutputfields;
577d8d0e25Snbeams   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
58*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
597d8d0e25Snbeams   CeedEvalMode emode;
607d8d0e25Snbeams   CeedVector vec, outvecs[16] = {};
617d8d0e25Snbeams 
627d8d0e25Snbeams   //Creation of the operator
63*e15f9bd0SJeremy L Thompson   ierr = CeedHipGenOperatorBuild(op); CeedChkBackend(ierr);
647d8d0e25Snbeams 
657d8d0e25Snbeams   // Input vectors
667d8d0e25Snbeams   for (CeedInt i = 0; i < numinputfields; i++) {
677d8d0e25Snbeams     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
68*e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
697d8d0e25Snbeams     if (emode == CEED_EVAL_WEIGHT) { // Skip
707d8d0e25Snbeams       data->fields.in[i] = NULL;
717d8d0e25Snbeams     } else {
727d8d0e25Snbeams       // Get input vector
73*e15f9bd0SJeremy L Thompson       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
747d8d0e25Snbeams       if (vec == CEED_VECTOR_ACTIVE) vec = invec;
757d8d0e25Snbeams       ierr = CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.in[i]);
76*e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
777d8d0e25Snbeams     }
787d8d0e25Snbeams   }
797d8d0e25Snbeams 
807d8d0e25Snbeams   // Output vectors
817d8d0e25Snbeams   for (CeedInt i = 0; i < numoutputfields; i++) {
827d8d0e25Snbeams     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
83*e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
847d8d0e25Snbeams     if (emode == CEED_EVAL_WEIGHT) { // Skip
857d8d0e25Snbeams       data->fields.out[i] = NULL;
867d8d0e25Snbeams     } else {
877d8d0e25Snbeams       // Get output vector
88*e15f9bd0SJeremy L Thompson       ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec);
89*e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
907d8d0e25Snbeams       if (vec == CEED_VECTOR_ACTIVE) vec = outvec;
917d8d0e25Snbeams       outvecs[i] = vec;
927d8d0e25Snbeams       // Check for multiple output modes
937d8d0e25Snbeams       CeedInt index = -1;
947d8d0e25Snbeams       for (CeedInt j = 0; j < i; j++) {
957d8d0e25Snbeams         if (vec == outvecs[j]) {
967d8d0e25Snbeams           index = j;
977d8d0e25Snbeams           break;
987d8d0e25Snbeams         }
997d8d0e25Snbeams       }
1007d8d0e25Snbeams       if (index == -1) {
1017d8d0e25Snbeams         ierr = CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.out[i]);
102*e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
1037d8d0e25Snbeams       } else {
1047d8d0e25Snbeams         data->fields.out[i] = data->fields.out[index];
1057d8d0e25Snbeams       }
1067d8d0e25Snbeams     }
1077d8d0e25Snbeams   }
1087d8d0e25Snbeams 
1097d8d0e25Snbeams   // Get context data
1107d8d0e25Snbeams   CeedQFunctionContext ctx;
111*e15f9bd0SJeremy L Thompson   ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr);
1127d8d0e25Snbeams   if (ctx) {
1137d8d0e25Snbeams     ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &qf_data->d_c);
114*e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1157d8d0e25Snbeams   }
1167d8d0e25Snbeams 
1177d8d0e25Snbeams   // Apply operator
1187d8d0e25Snbeams   void *opargs[] = {(void *) &nelem, &qf_data->d_c, &data->indices,
1197d8d0e25Snbeams                     &data->fields, &data->B, &data->G, &data->W
1207d8d0e25Snbeams                    };
1217d8d0e25Snbeams   const CeedInt dim = data->dim;
1227d8d0e25Snbeams   const CeedInt Q1d = data->Q1d;
1237d8d0e25Snbeams   const CeedInt P1d = data->maxP1d;
1247d8d0e25Snbeams   const CeedInt thread1d = CeedIntMax(Q1d, P1d);
1257d8d0e25Snbeams   if (dim==1) {
126e7ea6884Snbeams     CeedInt elemsPerBlock = 64*thread1d > 256? 256/thread1d : 64;
1277d8d0e25Snbeams     elemsPerBlock = elemsPerBlock>0?elemsPerBlock:1;
1287d8d0e25Snbeams     CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)
1297d8d0e25Snbeams                                            ? 1 : 0 );
1307d8d0e25Snbeams     CeedInt sharedMem = elemsPerBlock*thread1d*sizeof(CeedScalar);
1317d8d0e25Snbeams     ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, 1,
1327d8d0e25Snbeams                                      elemsPerBlock, sharedMem, opargs);
1337d8d0e25Snbeams   } else if (dim==2) {
1347d8d0e25Snbeams     const CeedInt elemsPerBlock = thread1d<4? 16 : 2;
1357d8d0e25Snbeams     CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)
1367d8d0e25Snbeams                                            ? 1 : 0 );
1377d8d0e25Snbeams     CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar);
1387d8d0e25Snbeams     ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, thread1d,
1397d8d0e25Snbeams                                      elemsPerBlock, sharedMem, opargs);
1407d8d0e25Snbeams   } else if (dim==3) {
1417d8d0e25Snbeams     const CeedInt elemsPerBlock = thread1d<6? 4 : (thread1d<8? 2 : 1);
1427d8d0e25Snbeams     CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)
1437d8d0e25Snbeams                                            ? 1 : 0 );
1447d8d0e25Snbeams     CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar);
1457d8d0e25Snbeams     ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, thread1d,
1467d8d0e25Snbeams                                      elemsPerBlock, sharedMem, opargs);
1477d8d0e25Snbeams   }
148*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
1497d8d0e25Snbeams 
1507d8d0e25Snbeams   // Restore input arrays
1517d8d0e25Snbeams   for (CeedInt i = 0; i < numinputfields; i++) {
1527d8d0e25Snbeams     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
153*e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1547d8d0e25Snbeams     if (emode == CEED_EVAL_WEIGHT) { // Skip
1557d8d0e25Snbeams     } else {
156*e15f9bd0SJeremy L Thompson       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
1577d8d0e25Snbeams       if (vec == CEED_VECTOR_ACTIVE) vec = invec;
1587d8d0e25Snbeams       ierr = CeedVectorRestoreArrayRead(vec, &data->fields.in[i]);
159*e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
1607d8d0e25Snbeams     }
1617d8d0e25Snbeams   }
1627d8d0e25Snbeams 
1637d8d0e25Snbeams   // Restore output arrays
1647d8d0e25Snbeams   for (CeedInt i = 0; i < numoutputfields; i++) {
1657d8d0e25Snbeams     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
166*e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1677d8d0e25Snbeams     if (emode == CEED_EVAL_WEIGHT) { // Skip
1687d8d0e25Snbeams     } else {
169*e15f9bd0SJeremy L Thompson       ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec);
170*e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
1717d8d0e25Snbeams       if (vec == CEED_VECTOR_ACTIVE) vec = outvec;
1727d8d0e25Snbeams       // Check for multiple output modes
1737d8d0e25Snbeams       CeedInt index = -1;
1747d8d0e25Snbeams       for (CeedInt j = 0; j < i; j++) {
1757d8d0e25Snbeams         if (vec == outvecs[j]) {
1767d8d0e25Snbeams           index = j;
1777d8d0e25Snbeams           break;
1787d8d0e25Snbeams         }
1797d8d0e25Snbeams       }
1807d8d0e25Snbeams       if (index == -1) {
1817d8d0e25Snbeams         ierr = CeedVectorRestoreArray(vec, &data->fields.out[i]);
182*e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
1837d8d0e25Snbeams       }
1847d8d0e25Snbeams     }
1857d8d0e25Snbeams   }
1867d8d0e25Snbeams 
1877d8d0e25Snbeams   // Restore context data
1887d8d0e25Snbeams   if (ctx) {
1897d8d0e25Snbeams     ierr = CeedQFunctionContextRestoreData(ctx, &qf_data->d_c);
190*e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
1917d8d0e25Snbeams   }
192*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1937d8d0e25Snbeams }
1947d8d0e25Snbeams 
1957d8d0e25Snbeams //------------------------------------------------------------------------------
1967d8d0e25Snbeams // Create FDM element inverse not supported
1977d8d0e25Snbeams //------------------------------------------------------------------------------
1987d8d0e25Snbeams static int CeedOperatorCreateFDMElementInverse_Hip(CeedOperator op) {
1997d8d0e25Snbeams   // LCOV_EXCL_START
2007d8d0e25Snbeams   int ierr;
2017d8d0e25Snbeams   Ceed ceed;
202*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
203*e15f9bd0SJeremy L Thompson   return CeedError(ceed, CEED_ERROR_BACKEND,
204*e15f9bd0SJeremy L Thompson                    "Backend does not implement FDM inverse creation");
2057d8d0e25Snbeams   // LCOV_EXCL_STOP
2067d8d0e25Snbeams }
2077d8d0e25Snbeams 
2087d8d0e25Snbeams //------------------------------------------------------------------------------
2097d8d0e25Snbeams // Create operator
2107d8d0e25Snbeams //------------------------------------------------------------------------------
2117d8d0e25Snbeams int CeedOperatorCreate_Hip_gen(CeedOperator op) {
2127d8d0e25Snbeams   int ierr;
2137d8d0e25Snbeams   Ceed ceed;
214*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
2157d8d0e25Snbeams   CeedOperator_Hip_gen *impl;
2167d8d0e25Snbeams 
217*e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
218*e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
2197d8d0e25Snbeams 
2207d8d0e25Snbeams   ierr = CeedSetBackendFunction(ceed, "Operator", op, "CreateFDMElementInverse",
2217d8d0e25Snbeams                                 CeedOperatorCreateFDMElementInverse_Hip);
222*e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
2237d8d0e25Snbeams   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
224*e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Hip_gen); CeedChkBackend(ierr);
2257d8d0e25Snbeams   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
226*e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Hip_gen); CeedChkBackend(ierr);
227*e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2287d8d0e25Snbeams }
2297d8d0e25Snbeams //------------------------------------------------------------------------------
230