Lines Matching full:-

1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
4 // SPDX-License-Identifier: BSD-2-Clause
15 #include "../cuda/ceed-cuda-common.h"
16 #include "ceed-cuda-ref.h"
18 //------------------------------------------------------------------------------
20 //------------------------------------------------------------------------------
29 *need_sync = has_valid_array && !impl->h_array; in CeedVectorNeedSync_Cuda()
32 *need_sync = has_valid_array && !impl->d_array; in CeedVectorNeedSync_Cuda()
38 //------------------------------------------------------------------------------
40 //------------------------------------------------------------------------------
48 …CeedCheck(impl->h_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid host data to syn… in CeedVectorSyncH2D_Cuda()
52 if (impl->d_array_borrowed) { in CeedVectorSyncH2D_Cuda()
53 impl->d_array = impl->d_array_borrowed; in CeedVectorSyncH2D_Cuda()
54 } else if (impl->d_array_owned) { in CeedVectorSyncH2D_Cuda()
55 impl->d_array = impl->d_array_owned; in CeedVectorSyncH2D_Cuda()
57 CeedCallCuda(CeedVectorReturnCeed(vec), cudaMalloc((void **)&impl->d_array_owned, bytes)); in CeedVectorSyncH2D_Cuda()
58 impl->d_array = impl->d_array_owned; in CeedVectorSyncH2D_Cuda()
60 …CeedCallCuda(CeedVectorReturnCeed(vec), cudaMemcpy(impl->d_array, impl->h_array, bytes, cudaMemcpy… in CeedVectorSyncH2D_Cuda()
64 //------------------------------------------------------------------------------
66 //------------------------------------------------------------------------------
73 …CeedCheck(impl->d_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid device data to s… in CeedVectorSyncD2H_Cuda()
75 if (impl->h_array_borrowed) { in CeedVectorSyncD2H_Cuda()
76 impl->h_array = impl->h_array_borrowed; in CeedVectorSyncD2H_Cuda()
77 } else if (impl->h_array_owned) { in CeedVectorSyncD2H_Cuda()
78 impl->h_array = impl->h_array_owned; in CeedVectorSyncD2H_Cuda()
83 CeedCallBackend(CeedCalloc(length, &impl->h_array_owned)); in CeedVectorSyncD2H_Cuda()
84 impl->h_array = impl->h_array_owned; in CeedVectorSyncD2H_Cuda()
90 …CeedCallCuda(CeedVectorReturnCeed(vec), cudaMemcpy(impl->h_array, impl->d_array, bytes, cudaMemcpy… in CeedVectorSyncD2H_Cuda()
94 //------------------------------------------------------------------------------
96 //------------------------------------------------------------------------------
113 //------------------------------------------------------------------------------
115 //------------------------------------------------------------------------------
120 impl->h_array = NULL; in CeedVectorSetAllInvalid_Cuda()
121 impl->d_array = NULL; in CeedVectorSetAllInvalid_Cuda()
125 //------------------------------------------------------------------------------
127 //------------------------------------------------------------------------------
132 *has_valid_array = impl->h_array || impl->d_array; in CeedVectorHasValidArray_Cuda()
136 //------------------------------------------------------------------------------
138 //------------------------------------------------------------------------------
145 *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned; in CeedVectorHasArrayOfType_Cuda()
148 *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned; in CeedVectorHasArrayOfType_Cuda()
154 //------------------------------------------------------------------------------
156 //------------------------------------------------------------------------------
163 *has_borrowed_array_of_type = impl->h_array_borrowed; in CeedVectorHasBorrowedArrayOfType_Cuda()
166 *has_borrowed_array_of_type = impl->d_array_borrowed; in CeedVectorHasBorrowedArrayOfType_Cuda()
172 //------------------------------------------------------------------------------
174 //------------------------------------------------------------------------------
182 …CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h… in CeedVectorSetArrayHost_Cuda()
183 … (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array)); in CeedVectorSetArrayHost_Cuda()
187 //------------------------------------------------------------------------------
189 //------------------------------------------------------------------------------
199 …viceCeedScalarArray_Cuda(ceed, array, copy_mode, length, (const CeedScalar **)&impl->d_array_owned, in CeedVectorSetArrayDevice_Cuda()
200 … (const CeedScalar **)&impl->d_array_borrowed, (const CeedScalar **)&impl->d_array)); in CeedVectorSetArrayDevice_Cuda()
205 //------------------------------------------------------------------------------
208 //------------------------------------------------------------------------------
223 //------------------------------------------------------------------------------
225 //------------------------------------------------------------------------------
231 //------------------------------------------------------------------------------
233 //------------------------------------------------------------------------------
236 //------------------------------------------------------------------------------
238 //------------------------------------------------------------------------------
251 if (stop == -1) stop = length; in CeedVectorCopyStrided_Cuda()
253 if (impl->d_array) { in CeedVectorCopyStrided_Cuda()
264 …CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64… in CeedVectorCopyStrided_Cuda()
266 …CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64… in CeedVectorCopyStrided_Cuda()
270 CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, stop, step, copy_array)); in CeedVectorCopyStrided_Cuda()
273 impl->h_array = NULL; in CeedVectorCopyStrided_Cuda()
274 } else if (impl->h_array) { in CeedVectorCopyStrided_Cuda()
278 CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, stop, step, copy_array)); in CeedVectorCopyStrided_Cuda()
280 impl->d_array = NULL; in CeedVectorCopyStrided_Cuda()
287 //------------------------------------------------------------------------------
289 //------------------------------------------------------------------------------
295 //------------------------------------------------------------------------------
297 //------------------------------------------------------------------------------
300 //------------------------------------------------------------------------------
302 //------------------------------------------------------------------------------
310 if (!impl->d_array && !impl->h_array) { in CeedVectorSetValue_Cuda()
311 if (impl->d_array_borrowed) { in CeedVectorSetValue_Cuda()
312 impl->d_array = impl->d_array_borrowed; in CeedVectorSetValue_Cuda()
313 } else if (impl->h_array_borrowed) { in CeedVectorSetValue_Cuda()
314 impl->h_array = impl->h_array_borrowed; in CeedVectorSetValue_Cuda()
315 } else if (impl->d_array_owned) { in CeedVectorSetValue_Cuda()
316 impl->d_array = impl->d_array_owned; in CeedVectorSetValue_Cuda()
317 } else if (impl->h_array_owned) { in CeedVectorSetValue_Cuda()
318 impl->h_array = impl->h_array_owned; in CeedVectorSetValue_Cuda()
323 if (impl->d_array) { in CeedVectorSetValue_Cuda()
325 …CeedCallCuda(CeedVectorReturnCeed(vec), cudaMemset(impl->d_array, 0, length * sizeof(CeedScalar))); in CeedVectorSetValue_Cuda()
327 CeedCallBackend(CeedDeviceSetValue_Cuda(impl->d_array, length, val)); in CeedVectorSetValue_Cuda()
329 impl->h_array = NULL; in CeedVectorSetValue_Cuda()
330 } else if (impl->h_array) { in CeedVectorSetValue_Cuda()
331 CeedCallBackend(CeedHostSetValue_Cuda(impl->h_array, length, val)); in CeedVectorSetValue_Cuda()
332 impl->d_array = NULL; in CeedVectorSetValue_Cuda()
337 //------------------------------------------------------------------------------
339 //------------------------------------------------------------------------------
345 //------------------------------------------------------------------------------
347 //------------------------------------------------------------------------------
350 //------------------------------------------------------------------------------
352 //------------------------------------------------------------------------------
360 if (stop == -1) stop = length; in CeedVectorSetValueStrided_Cuda()
361 if (impl->d_array) { in CeedVectorSetValueStrided_Cuda()
362 CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, stop, step, val)); in CeedVectorSetValueStrided_Cuda()
363 impl->h_array = NULL; in CeedVectorSetValueStrided_Cuda()
364 } else if (impl->h_array) { in CeedVectorSetValueStrided_Cuda()
365 CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, stop, step, val)); in CeedVectorSetValueStrided_Cuda()
366 impl->d_array = NULL; in CeedVectorSetValueStrided_Cuda()
373 //------------------------------------------------------------------------------
375 //------------------------------------------------------------------------------
385 (*array) = impl->h_array_borrowed; in CeedVectorTakeArray_Cuda()
386 impl->h_array_borrowed = NULL; in CeedVectorTakeArray_Cuda()
387 impl->h_array = NULL; in CeedVectorTakeArray_Cuda()
390 (*array) = impl->d_array_borrowed; in CeedVectorTakeArray_Cuda()
391 impl->d_array_borrowed = NULL; in CeedVectorTakeArray_Cuda()
392 impl->d_array = NULL; in CeedVectorTakeArray_Cuda()
398 //------------------------------------------------------------------------------
401 //------------------------------------------------------------------------------
411 *array = impl->h_array; in CeedVectorGetArrayCore_Cuda()
414 *array = impl->d_array; in CeedVectorGetArrayCore_Cuda()
420 //------------------------------------------------------------------------------
421 // Get read-only access to a vector via the specified mem_type
422 //------------------------------------------------------------------------------
427 //------------------------------------------------------------------------------
429 //------------------------------------------------------------------------------
438 impl->h_array = *array; in CeedVectorGetArray_Cuda()
441 impl->d_array = *array; in CeedVectorGetArray_Cuda()
447 //------------------------------------------------------------------------------
449 //------------------------------------------------------------------------------
463 if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed; in CeedVectorGetArrayWrite_Cuda()
464 else impl->h_array = impl->h_array_owned; in CeedVectorGetArrayWrite_Cuda()
467 if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed; in CeedVectorGetArrayWrite_Cuda()
468 else impl->d_array = impl->d_array_owned; in CeedVectorGetArrayWrite_Cuda()
474 //------------------------------------------------------------------------------
476 //------------------------------------------------------------------------------
493 // With CUDA 12, we can use the 64-bit integer interface. Prior to that, in CeedVectorNorm_Cuda()
506 #if (CUDA_VERSION >= 12000) // We have CUDA 12, and can use 64-bit integers in CeedVectorNorm_Cuda()
514 CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; in CeedVectorNorm_Cuda()
515 CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; in CeedVectorNorm_Cuda()
530 CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; in CeedVectorNorm_Cuda()
531 CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; in CeedVectorNorm_Cuda()
550 CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; in CeedVectorNorm_Cuda()
551 CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; in CeedVectorNorm_Cuda()
567 CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; in CeedVectorNorm_Cuda()
568 CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; in CeedVectorNorm_Cuda()
585 …CeedCallCuda(ceed, cudaMemcpy(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), cudaMem… in CeedVectorNorm_Cuda()
594 CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; in CeedVectorNorm_Cuda()
595 CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; in CeedVectorNorm_Cuda()
598 …CeedCallCuda(ceed, cudaMemcpy(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), cudaMemcpyD… in CeedVectorNorm_Cuda()
609 …CeedCallCuda(ceed, cudaMemcpy(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), cudaMem… in CeedVectorNorm_Cuda()
618 CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; in CeedVectorNorm_Cuda()
619 CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; in CeedVectorNorm_Cuda()
622 …CeedCallCuda(ceed, cudaMemcpy(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), cudaMemcpyD… in CeedVectorNorm_Cuda()
636 //------------------------------------------------------------------------------
638 //------------------------------------------------------------------------------
646 //------------------------------------------------------------------------------
648 //------------------------------------------------------------------------------
651 //------------------------------------------------------------------------------
653 //------------------------------------------------------------------------------
661 if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Cuda(impl->d_array, length)); in CeedVectorReciprocal_Cuda()
662 if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Cuda(impl->h_array, length)); in CeedVectorReciprocal_Cuda()
666 //------------------------------------------------------------------------------
668 //------------------------------------------------------------------------------
674 //------------------------------------------------------------------------------
676 //------------------------------------------------------------------------------
679 //------------------------------------------------------------------------------
681 //------------------------------------------------------------------------------
689 if (impl->d_array) { in CeedVectorScale_Cuda()
695 …CeedCallCublas(CeedVectorReturnCeed(x), cublasSscal_64(handle, (int64_t)length, &alpha, impl->d_ar… in CeedVectorScale_Cuda()
697 …CeedCallCublas(CeedVectorReturnCeed(x), cublasDscal_64(handle, (int64_t)length, &alpha, impl->d_ar… in CeedVectorScale_Cuda()
700 CeedCallBackend(CeedDeviceScale_Cuda(impl->d_array, alpha, length)); in CeedVectorScale_Cuda()
702 impl->h_array = NULL; in CeedVectorScale_Cuda()
703 } else if (impl->h_array) { in CeedVectorScale_Cuda()
704 CeedCallBackend(CeedHostScale_Cuda(impl->h_array, alpha, length)); in CeedVectorScale_Cuda()
705 impl->d_array = NULL; in CeedVectorScale_Cuda()
710 //------------------------------------------------------------------------------
712 //------------------------------------------------------------------------------
718 //------------------------------------------------------------------------------
720 //------------------------------------------------------------------------------
723 //------------------------------------------------------------------------------
725 //------------------------------------------------------------------------------
734 if (y_impl->d_array) { in CeedVectorAXPY_Cuda()
741 …urnCeed(y), cublasSaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1… in CeedVectorAXPY_Cuda()
743 …urnCeed(y), cublasDaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1… in CeedVectorAXPY_Cuda()
746 CeedCallBackend(CeedDeviceAXPY_Cuda(y_impl->d_array, alpha, x_impl->d_array, length)); in CeedVectorAXPY_Cuda()
748 y_impl->h_array = NULL; in CeedVectorAXPY_Cuda()
749 } else if (y_impl->h_array) { in CeedVectorAXPY_Cuda()
751 CeedCallBackend(CeedHostAXPY_Cuda(y_impl->h_array, alpha, x_impl->h_array, length)); in CeedVectorAXPY_Cuda()
752 y_impl->d_array = NULL; in CeedVectorAXPY_Cuda()
757 //------------------------------------------------------------------------------
759 //------------------------------------------------------------------------------
765 //------------------------------------------------------------------------------
767 //------------------------------------------------------------------------------
770 //------------------------------------------------------------------------------
772 //------------------------------------------------------------------------------
781 if (y_impl->d_array) { in CeedVectorAXPBY_Cuda()
783 CeedCallBackend(CeedDeviceAXPBY_Cuda(y_impl->d_array, alpha, beta, x_impl->d_array, length)); in CeedVectorAXPBY_Cuda()
785 if (y_impl->h_array) { in CeedVectorAXPBY_Cuda()
787 CeedCallBackend(CeedHostAXPBY_Cuda(y_impl->h_array, alpha, beta, x_impl->h_array, length)); in CeedVectorAXPBY_Cuda()
792 //------------------------------------------------------------------------------
794 //------------------------------------------------------------------------------
800 //------------------------------------------------------------------------------
802 //------------------------------------------------------------------------------
805 //------------------------------------------------------------------------------
807 //------------------------------------------------------------------------------
817 if (!w_impl->d_array && !w_impl->h_array) { in CeedVectorPointwiseMult_Cuda()
820 if (w_impl->d_array) { in CeedVectorPointwiseMult_Cuda()
823 …CeedCallBackend(CeedDevicePointwiseMult_Cuda(w_impl->d_array, x_impl->d_array, y_impl->d_array, le… in CeedVectorPointwiseMult_Cuda()
825 if (w_impl->h_array) { in CeedVectorPointwiseMult_Cuda()
828 …CeedCallBackend(CeedHostPointwiseMult_Cuda(w_impl->h_array, x_impl->h_array, y_impl->h_array, leng… in CeedVectorPointwiseMult_Cuda()
833 //------------------------------------------------------------------------------
835 //------------------------------------------------------------------------------
840 CeedCallCuda(CeedVectorReturnCeed(vec), cudaFree(impl->d_array_owned)); in CeedVectorDestroy_Cuda()
841 CeedCallBackend(CeedFree(&impl->h_array_owned)); in CeedVectorDestroy_Cuda()
846 //------------------------------------------------------------------------------
848 //------------------------------------------------------------------------------
878 //------------------------------------------------------------------------------