Home
last modified time | relevance | path

Searched full:arch (Results 1 – 9 of 9) sorted by relevance

/libCEED/.github/workflows/
H A Dc-fortan-test-ppc64le.yml15 arch: [ppc64le]
24 uses: uraimo/run-on-arch-action@v3
30 arch: ${{ matrix.arch }}
/libCEED/backends/magma/tuning/
H A DREADME.md8 header files called `<ARCH>_rtc.h`, where `<ARCH>` is the GPU name, as well as a
18 python generate_tuning.py -arch a100 -max-nb 32 -build-cmd "make" -ceed "/gpu/cuda/magma"
H A Dgenerate_tuning.py78 "-arch",
123 with open(f"{script_dir}/{args.arch}_rtc.h", "w") as f:
126 f.write(f"// auto-generated from data on {args.arch}\n\n")
133 f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_t_{args.arch}" +
147 f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_n_{args.arch}" +
/libCEED/backends/sycl/
H A Donline_compiler.hpp94 …online_compiler(sycl::info::device_type dev_type, device_arch arch, compiled_code_format fmt = com… in online_compiler() argument
95 …: OutputFormat(fmt), OutputFormatVersion({0, 0}), DeviceType(dev_type), DeviceArch(arch), Is64Bit(… in online_compiler()
138 online_compiler<Lang> &setTargetDeviceArch(device_arch arch) { in setTargetDeviceArch() argument
139 DeviceArch = arch; in setTargetDeviceArch()
/libCEED/backends/magma/
H A Dceed-magma-gemm-nontensor.cpp87 magma_int_t arch = magma_getdevice_arch(); in magma_gemm_nontensor() local
101 gemm_selector(arch, precision, trans, m, n, k, &nbatch, &use_magmablas); in magma_gemm_nontensor()
H A Dceed-magma-basis.c324 magma_int_t arch = magma_getdevice_arch(); in CeedBasisApplyNonTensorCore_Magma() local
340 impl->NB_interp[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_interp, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma()
341 impl->NB_interp_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_interp, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma()
342 impl->NB_deriv[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_deriv, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma()
343 impl->NB_deriv_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_deriv, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma()
/libCEED/backends/cuda/
H A Dceed-cuda-compile.cpp117 // -arch, since it was only emitting PTX. It will now support actual in CeedCompileCore_Cuda()
120 "-arch=sm_" in CeedCompileCore_Cuda()
122 "-arch=compute_" in CeedCompileCore_Cuda()
327 …ion ? (std::string("-") + std::to_string(llvm_version)) : "") + " -flto=thin --cuda-gpu-arch=sm_" + in CeedCompileCore_Cuda()
/libCEED/
H A DMakefile181 NVCCFLAGS += -arch=$(CUDA_ARCH)
185 HIPCCFLAGS += --offload-arch=$(HIP_ARCH)
/libCEED/rust/libceed-sys/c-src/
H A DMakefile181 NVCCFLAGS += -arch=$(CUDA_ARCH)
185 HIPCCFLAGS += --offload-arch=$(HIP_ARCH)