Searched full:arch (Results 1 – 9 of 9) sorted by relevance
| /libCEED/.github/workflows/ |
| H A D | c-fortan-test-ppc64le.yml | 15 arch: [ppc64le] 24 uses: uraimo/run-on-arch-action@v3 30 arch: ${{ matrix.arch }}
|
| /libCEED/backends/magma/tuning/ |
| H A D | README.md | 8 header files called `<ARCH>_rtc.h`, where `<ARCH>` is the GPU name, as well as a 18 python generate_tuning.py -arch a100 -max-nb 32 -build-cmd "make" -ceed "/gpu/cuda/magma"
|
| H A D | generate_tuning.py | 78 "-arch", 123 with open(f"{script_dir}/{args.arch}_rtc.h", "w") as f: 126 f.write(f"// auto-generated from data on {args.arch}\n\n") 133 f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_t_{args.arch}" + 147 f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_n_{args.arch}" +
|
| /libCEED/backends/sycl/ |
| H A D | online_compiler.hpp | 94 …online_compiler(sycl::info::device_type dev_type, device_arch arch, compiled_code_format fmt = com… in online_compiler() argument 95 …: OutputFormat(fmt), OutputFormatVersion({0, 0}), DeviceType(dev_type), DeviceArch(arch), Is64Bit(… in online_compiler() 138 online_compiler<Lang> &setTargetDeviceArch(device_arch arch) { in setTargetDeviceArch() argument 139 DeviceArch = arch; in setTargetDeviceArch()
|
| /libCEED/backends/magma/ |
| H A D | ceed-magma-gemm-nontensor.cpp | 87 magma_int_t arch = magma_getdevice_arch(); in magma_gemm_nontensor() local 101 gemm_selector(arch, precision, trans, m, n, k, &nbatch, &use_magmablas); in magma_gemm_nontensor()
|
| H A D | ceed-magma-basis.c | 324 magma_int_t arch = magma_getdevice_arch(); in CeedBasisApplyNonTensorCore_Magma() local 340 impl->NB_interp[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_interp, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma() 341 impl->NB_interp_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_interp, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma() 342 impl->NB_deriv[iN] = nontensor_rtc_get_nb(arch, 'n', q_comp_deriv, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma() 343 impl->NB_deriv_t[iN] = nontensor_rtc_get_nb(arch, 't', q_comp_deriv, P, Q, n_array[iN]); in CeedBasisApplyNonTensorCore_Magma()
|
| /libCEED/backends/cuda/ |
| H A D | ceed-cuda-compile.cpp | 117 // -arch, since it was only emitting PTX. It will now support actual in CeedCompileCore_Cuda() 120 "-arch=sm_" in CeedCompileCore_Cuda() 122 "-arch=compute_" in CeedCompileCore_Cuda() 327 …ion ? (std::string("-") + std::to_string(llvm_version)) : "") + " -flto=thin --cuda-gpu-arch=sm_" + in CeedCompileCore_Cuda()
|
| /libCEED/ |
| H A D | Makefile | 181 NVCCFLAGS += -arch=$(CUDA_ARCH) 185 HIPCCFLAGS += --offload-arch=$(HIP_ARCH)
|
| /libCEED/rust/libceed-sys/c-src/ |
| H A D | Makefile | 181 NVCCFLAGS += -arch=$(CUDA_ARCH) 185 HIPCCFLAGS += --offload-arch=$(HIP_ARCH)
|