17d8d0e25Snbeams // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 27d8d0e25Snbeams // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 37d8d0e25Snbeams // All Rights reserved. See files LICENSE and NOTICE for details. 47d8d0e25Snbeams // 57d8d0e25Snbeams // This file is part of CEED, a collection of benchmarks, miniapps, software 67d8d0e25Snbeams // libraries and APIs for efficient high-order finite element and spectral 77d8d0e25Snbeams // element discretizations for exascale applications. For more information and 87d8d0e25Snbeams // source code availability see http://github.com/ceed. 97d8d0e25Snbeams // 107d8d0e25Snbeams // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 117d8d0e25Snbeams // a collaborative effort of two U.S. Department of Energy organizations (Office 127d8d0e25Snbeams // of Science and the National Nuclear Security Administration) responsible for 137d8d0e25Snbeams // the planning and preparation of a capable exascale ecosystem, including 147d8d0e25Snbeams // software, applications, hardware, advanced system engineering and early 157d8d0e25Snbeams // testbed platforms, in support of the nation's exascale computing imperative. 167d8d0e25Snbeams 173d576824SJeremy L Thompson #include <ceed.h> 183d576824SJeremy L Thompson #include <ceed-backend.h> 193d576824SJeremy L Thompson #include <stddef.h> 207d8d0e25Snbeams #include "ceed-hip-gen.h" 217d8d0e25Snbeams #include "ceed-hip-gen-operator-build.h" 227d8d0e25Snbeams #include "../hip/ceed-hip-compile.h" 237d8d0e25Snbeams 247d8d0e25Snbeams //------------------------------------------------------------------------------ 257d8d0e25Snbeams // Destroy operator 267d8d0e25Snbeams //------------------------------------------------------------------------------ 277d8d0e25Snbeams static int CeedOperatorDestroy_Hip_gen(CeedOperator op) { 287d8d0e25Snbeams int ierr; 297d8d0e25Snbeams CeedOperator_Hip_gen *impl; 30*e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr); 31*e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 32*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 337d8d0e25Snbeams } 347d8d0e25Snbeams 357d8d0e25Snbeams //------------------------------------------------------------------------------ 367d8d0e25Snbeams // Apply and add to output 377d8d0e25Snbeams //------------------------------------------------------------------------------ 387d8d0e25Snbeams static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector invec, 397d8d0e25Snbeams CeedVector outvec, CeedRequest *request) { 407d8d0e25Snbeams int ierr; 417d8d0e25Snbeams Ceed ceed; 42*e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 437d8d0e25Snbeams CeedOperator_Hip_gen *data; 44*e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetData(op, &data); CeedChkBackend(ierr); 457d8d0e25Snbeams CeedQFunction qf; 467d8d0e25Snbeams CeedQFunction_Hip_gen *qf_data; 47*e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr); 48*e15f9bd0SJeremy L Thompson ierr = CeedQFunctionGetData(qf, &qf_data); CeedChkBackend(ierr); 497d8d0e25Snbeams CeedInt nelem, numinputfields, numoutputfields; 50*e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetNumElements(op, &nelem); CeedChkBackend(ierr); 517d8d0e25Snbeams ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); 52*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 537d8d0e25Snbeams CeedOperatorField *opinputfields, *opoutputfields; 547d8d0e25Snbeams ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields); 55*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 567d8d0e25Snbeams CeedQFunctionField *qfinputfields, *qfoutputfields; 577d8d0e25Snbeams ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields); 58*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 597d8d0e25Snbeams CeedEvalMode emode; 607d8d0e25Snbeams CeedVector vec, outvecs[16] = {}; 617d8d0e25Snbeams 627d8d0e25Snbeams //Creation of the operator 63*e15f9bd0SJeremy L Thompson ierr = CeedHipGenOperatorBuild(op); CeedChkBackend(ierr); 647d8d0e25Snbeams 657d8d0e25Snbeams // Input vectors 667d8d0e25Snbeams for (CeedInt i = 0; i < numinputfields; i++) { 677d8d0e25Snbeams ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 68*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 697d8d0e25Snbeams if (emode == CEED_EVAL_WEIGHT) { // Skip 707d8d0e25Snbeams data->fields.in[i] = NULL; 717d8d0e25Snbeams } else { 727d8d0e25Snbeams // Get input vector 73*e15f9bd0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 747d8d0e25Snbeams if (vec == CEED_VECTOR_ACTIVE) vec = invec; 757d8d0e25Snbeams ierr = CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &data->fields.in[i]); 76*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 777d8d0e25Snbeams } 787d8d0e25Snbeams } 797d8d0e25Snbeams 807d8d0e25Snbeams // Output vectors 817d8d0e25Snbeams for (CeedInt i = 0; i < numoutputfields; i++) { 827d8d0e25Snbeams ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 83*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 847d8d0e25Snbeams if (emode == CEED_EVAL_WEIGHT) { // Skip 857d8d0e25Snbeams data->fields.out[i] = NULL; 867d8d0e25Snbeams } else { 877d8d0e25Snbeams // Get output vector 88*e15f9bd0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 89*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 907d8d0e25Snbeams if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 917d8d0e25Snbeams outvecs[i] = vec; 927d8d0e25Snbeams // Check for multiple output modes 937d8d0e25Snbeams CeedInt index = -1; 947d8d0e25Snbeams for (CeedInt j = 0; j < i; j++) { 957d8d0e25Snbeams if (vec == outvecs[j]) { 967d8d0e25Snbeams index = j; 977d8d0e25Snbeams break; 987d8d0e25Snbeams } 997d8d0e25Snbeams } 1007d8d0e25Snbeams if (index == -1) { 1017d8d0e25Snbeams ierr = CeedVectorGetArray(vec, CEED_MEM_DEVICE, &data->fields.out[i]); 102*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1037d8d0e25Snbeams } else { 1047d8d0e25Snbeams data->fields.out[i] = data->fields.out[index]; 1057d8d0e25Snbeams } 1067d8d0e25Snbeams } 1077d8d0e25Snbeams } 1087d8d0e25Snbeams 1097d8d0e25Snbeams // Get context data 1107d8d0e25Snbeams CeedQFunctionContext ctx; 111*e15f9bd0SJeremy L Thompson ierr = CeedQFunctionGetInnerContext(qf, &ctx); CeedChkBackend(ierr); 1127d8d0e25Snbeams if (ctx) { 1137d8d0e25Snbeams ierr = CeedQFunctionContextGetData(ctx, CEED_MEM_DEVICE, &qf_data->d_c); 114*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1157d8d0e25Snbeams } 1167d8d0e25Snbeams 1177d8d0e25Snbeams // Apply operator 1187d8d0e25Snbeams void *opargs[] = {(void *) &nelem, &qf_data->d_c, &data->indices, 1197d8d0e25Snbeams &data->fields, &data->B, &data->G, &data->W 1207d8d0e25Snbeams }; 1217d8d0e25Snbeams const CeedInt dim = data->dim; 1227d8d0e25Snbeams const CeedInt Q1d = data->Q1d; 1237d8d0e25Snbeams const CeedInt P1d = data->maxP1d; 1247d8d0e25Snbeams const CeedInt thread1d = CeedIntMax(Q1d, P1d); 1257d8d0e25Snbeams if (dim==1) { 126e7ea6884Snbeams CeedInt elemsPerBlock = 64*thread1d > 256? 256/thread1d : 64; 1277d8d0e25Snbeams elemsPerBlock = elemsPerBlock>0?elemsPerBlock:1; 1287d8d0e25Snbeams CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 1297d8d0e25Snbeams ? 1 : 0 ); 1307d8d0e25Snbeams CeedInt sharedMem = elemsPerBlock*thread1d*sizeof(CeedScalar); 1317d8d0e25Snbeams ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, 1, 1327d8d0e25Snbeams elemsPerBlock, sharedMem, opargs); 1337d8d0e25Snbeams } else if (dim==2) { 1347d8d0e25Snbeams const CeedInt elemsPerBlock = thread1d<4? 16 : 2; 1357d8d0e25Snbeams CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 1367d8d0e25Snbeams ? 1 : 0 ); 1377d8d0e25Snbeams CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar); 1387d8d0e25Snbeams ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, thread1d, 1397d8d0e25Snbeams elemsPerBlock, sharedMem, opargs); 1407d8d0e25Snbeams } else if (dim==3) { 1417d8d0e25Snbeams const CeedInt elemsPerBlock = thread1d<6? 4 : (thread1d<8? 2 : 1); 1427d8d0e25Snbeams CeedInt grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem) 1437d8d0e25Snbeams ? 1 : 0 ); 1447d8d0e25Snbeams CeedInt sharedMem = elemsPerBlock*thread1d*thread1d*sizeof(CeedScalar); 1457d8d0e25Snbeams ierr = CeedRunKernelDimSharedHip(ceed, data->op, grid, thread1d, thread1d, 1467d8d0e25Snbeams elemsPerBlock, sharedMem, opargs); 1477d8d0e25Snbeams } 148*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1497d8d0e25Snbeams 1507d8d0e25Snbeams // Restore input arrays 1517d8d0e25Snbeams for (CeedInt i = 0; i < numinputfields; i++) { 1527d8d0e25Snbeams ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode); 153*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1547d8d0e25Snbeams if (emode == CEED_EVAL_WEIGHT) { // Skip 1557d8d0e25Snbeams } else { 156*e15f9bd0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr); 1577d8d0e25Snbeams if (vec == CEED_VECTOR_ACTIVE) vec = invec; 1587d8d0e25Snbeams ierr = CeedVectorRestoreArrayRead(vec, &data->fields.in[i]); 159*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1607d8d0e25Snbeams } 1617d8d0e25Snbeams } 1627d8d0e25Snbeams 1637d8d0e25Snbeams // Restore output arrays 1647d8d0e25Snbeams for (CeedInt i = 0; i < numoutputfields; i++) { 1657d8d0e25Snbeams ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode); 166*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1677d8d0e25Snbeams if (emode == CEED_EVAL_WEIGHT) { // Skip 1687d8d0e25Snbeams } else { 169*e15f9bd0SJeremy L Thompson ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); 170*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1717d8d0e25Snbeams if (vec == CEED_VECTOR_ACTIVE) vec = outvec; 1727d8d0e25Snbeams // Check for multiple output modes 1737d8d0e25Snbeams CeedInt index = -1; 1747d8d0e25Snbeams for (CeedInt j = 0; j < i; j++) { 1757d8d0e25Snbeams if (vec == outvecs[j]) { 1767d8d0e25Snbeams index = j; 1777d8d0e25Snbeams break; 1787d8d0e25Snbeams } 1797d8d0e25Snbeams } 1807d8d0e25Snbeams if (index == -1) { 1817d8d0e25Snbeams ierr = CeedVectorRestoreArray(vec, &data->fields.out[i]); 182*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1837d8d0e25Snbeams } 1847d8d0e25Snbeams } 1857d8d0e25Snbeams } 1867d8d0e25Snbeams 1877d8d0e25Snbeams // Restore context data 1887d8d0e25Snbeams if (ctx) { 1897d8d0e25Snbeams ierr = CeedQFunctionContextRestoreData(ctx, &qf_data->d_c); 190*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 1917d8d0e25Snbeams } 192*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1937d8d0e25Snbeams } 1947d8d0e25Snbeams 1957d8d0e25Snbeams //------------------------------------------------------------------------------ 1967d8d0e25Snbeams // Create FDM element inverse not supported 1977d8d0e25Snbeams //------------------------------------------------------------------------------ 1987d8d0e25Snbeams static int CeedOperatorCreateFDMElementInverse_Hip(CeedOperator op) { 1997d8d0e25Snbeams // LCOV_EXCL_START 2007d8d0e25Snbeams int ierr; 2017d8d0e25Snbeams Ceed ceed; 202*e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 203*e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 204*e15f9bd0SJeremy L Thompson "Backend does not implement FDM inverse creation"); 2057d8d0e25Snbeams // LCOV_EXCL_STOP 2067d8d0e25Snbeams } 2077d8d0e25Snbeams 2087d8d0e25Snbeams //------------------------------------------------------------------------------ 2097d8d0e25Snbeams // Create operator 2107d8d0e25Snbeams //------------------------------------------------------------------------------ 2117d8d0e25Snbeams int CeedOperatorCreate_Hip_gen(CeedOperator op) { 2127d8d0e25Snbeams int ierr; 2137d8d0e25Snbeams Ceed ceed; 214*e15f9bd0SJeremy L Thompson ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr); 2157d8d0e25Snbeams CeedOperator_Hip_gen *impl; 2167d8d0e25Snbeams 217*e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 218*e15f9bd0SJeremy L Thompson ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr); 2197d8d0e25Snbeams 2207d8d0e25Snbeams ierr = CeedSetBackendFunction(ceed, "Operator", op, "CreateFDMElementInverse", 2217d8d0e25Snbeams CeedOperatorCreateFDMElementInverse_Hip); 222*e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 2237d8d0e25Snbeams ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd", 224*e15f9bd0SJeremy L Thompson CeedOperatorApplyAdd_Hip_gen); CeedChkBackend(ierr); 2257d8d0e25Snbeams ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy", 226*e15f9bd0SJeremy L Thompson CeedOperatorDestroy_Hip_gen); CeedChkBackend(ierr); 227*e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2287d8d0e25Snbeams } 2297d8d0e25Snbeams //------------------------------------------------------------------------------ 230