#pragma once #include /*I I*/ #include <../src/mat/impls/dense/mpi/mpidense.h> #include <../src/mat/impls/dense/seq/cupm/matseqdensecupm.hpp> #include <../src/vec/vec/impls/mpi/cupm/vecmpicupm.hpp> namespace Petsc { namespace mat { namespace cupm { namespace impl { template class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL MatDense_MPI_CUPM : MatDense_CUPM> { public: MATDENSECUPM_HEADER(T, MatDense_MPI_CUPM); private: PETSC_NODISCARD static constexpr Mat_MPIDense *MatIMPLCast_(Mat) noexcept; PETSC_NODISCARD static constexpr MatType MATIMPLCUPM_() noexcept; static PetscErrorCode SetPreallocation_(Mat, PetscDeviceContext, PetscScalar *) noexcept; template static PetscErrorCode Convert_Dispatch_(Mat, MatType, MatReuse, Mat *) noexcept; public: PETSC_NODISCARD static constexpr const char *MatConvert_mpidensecupm_mpidense_C() noexcept; PETSC_NODISCARD static constexpr const char *MatProductSetFromOptions_mpiaij_mpidensecupm_C() noexcept; PETSC_NODISCARD static constexpr const char *MatProductSetFromOptions_mpidensecupm_mpiaij_C() noexcept; PETSC_NODISCARD static constexpr const char *MatProductSetFromOptions_mpiaijcupmsparse_mpidensecupm_C() noexcept; PETSC_NODISCARD static constexpr const char *MatProductSetFromOptions_mpidensecupm_mpiaijcupmsparse_C() noexcept; static PetscErrorCode Create(Mat) noexcept; static PetscErrorCode BindToCPU(Mat, PetscBool) noexcept; static PetscErrorCode Convert_MPIDenseCUPM_MPIDense(Mat, MatType, MatReuse, Mat *) noexcept; static PetscErrorCode Convert_MPIDense_MPIDenseCUPM(Mat, MatType, MatReuse, Mat *) noexcept; template static PetscErrorCode GetArray(Mat, PetscScalar **, PetscDeviceContext = nullptr) noexcept; template static PetscErrorCode RestoreArray(Mat, PetscScalar **, PetscDeviceContext = nullptr) noexcept; private: template static PetscErrorCode GetArrayC_(Mat m, PetscScalar **p) noexcept { return GetArray(m, p); } template static PetscErrorCode RestoreArrayC_(Mat m, PetscScalar **p) noexcept { return RestoreArray(m, p); } public: template static PetscErrorCode GetColumnVec(Mat, PetscInt, Vec *) noexcept; template static PetscErrorCode RestoreColumnVec(Mat, PetscInt, Vec *) noexcept; static PetscErrorCode PlaceArray(Mat, const PetscScalar *) noexcept; static PetscErrorCode ReplaceArray(Mat, const PetscScalar *) noexcept; static PetscErrorCode ResetArray(Mat) noexcept; }; } // namespace impl namespace { // Declare this here so that the functions below can make use of it template inline PetscErrorCode MatCreateMPIDenseCUPM(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A, PetscDeviceContext dctx = nullptr, bool preallocate = true) noexcept { PetscFunctionBegin; PetscCall(impl::MatDense_MPI_CUPM::CreateIMPLDenseCUPM(comm, m, n, M, N, data, A, dctx, preallocate)); PetscFunctionReturn(PETSC_SUCCESS); } } // anonymous namespace namespace impl { // ========================================================================================== // MatDense_MPI_CUPM -- Private API // ========================================================================================== template inline constexpr Mat_MPIDense *MatDense_MPI_CUPM::MatIMPLCast_(Mat m) noexcept { return static_cast(m->data); } template inline constexpr MatType MatDense_MPI_CUPM::MATIMPLCUPM_() noexcept { return MATMPIDENSECUPM(); } // ========================================================================================== template inline PetscErrorCode MatDense_MPI_CUPM::SetPreallocation_(Mat A, PetscDeviceContext dctx, PetscScalar *device_array) noexcept { PetscFunctionBegin; if (auto &mimplA = MatIMPLCast(A)->A) { PetscCall(MatSetType(mimplA, MATSEQDENSECUPM())); PetscCall(MatDense_Seq_CUPM::SetPreallocation(mimplA, dctx, device_array)); } else { PetscCall(MatCreateSeqDenseCUPM(PETSC_COMM_SELF, A->rmap->n, A->cmap->N, device_array, &mimplA, dctx)); } PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode MatDense_MPI_CUPM::Convert_Dispatch_(Mat M, MatType, MatReuse reuse, Mat *newmat) noexcept { PetscFunctionBegin; if (reuse == MAT_INITIAL_MATRIX) { PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); } else if (reuse == MAT_REUSE_MATRIX) { PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); } { const auto B = *newmat; const auto pobj = PetscObjectCast(B); if (to_host) { PetscCall(BindToCPU(B, PETSC_TRUE)); } else { PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUPM())); } PetscCall(PetscStrFreeAllocpy(to_host ? VECSTANDARD : VecMPI_CUPM::VECCUPM(), &B->defaultvectype)); PetscCall(PetscObjectChangeTypeName(pobj, to_host ? MATMPIDENSE : MATMPIDENSECUPM())); // ============================================================ // Composed Ops // ============================================================ MatComposeOp_CUPM(to_host, pobj, MatConvert_mpidensecupm_mpidense_C(), nullptr, Convert_MPIDenseCUPM_MPIDense); MatComposeOp_CUPM(to_host, pobj, MatProductSetFromOptions_mpiaij_mpidensecupm_C(), nullptr, MatProductSetFromOptions_MPIAIJ_MPIDense); MatComposeOp_CUPM(to_host, pobj, MatProductSetFromOptions_mpiaijcupmsparse_mpidensecupm_C(), nullptr, MatProductSetFromOptions_MPIAIJ_MPIDense); MatComposeOp_CUPM(to_host, pobj, MatProductSetFromOptions_mpidensecupm_mpiaij_C(), nullptr, MatProductSetFromOptions_MPIDense_MPIAIJ); MatComposeOp_CUPM(to_host, pobj, MatProductSetFromOptions_mpidensecupm_mpiaijcupmsparse_C(), nullptr, MatProductSetFromOptions_MPIDense_MPIAIJ); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMGetArray_C(), nullptr, GetArrayC_); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMGetArrayRead_C(), nullptr, GetArrayC_); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMGetArrayWrite_C(), nullptr, GetArrayC_); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMRestoreArray_C(), nullptr, RestoreArrayC_); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMRestoreArrayRead_C(), nullptr, RestoreArrayC_); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMRestoreArrayWrite_C(), nullptr, RestoreArrayC_); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMPlaceArray_C(), nullptr, PlaceArray); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMResetArray_C(), nullptr, ResetArray); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMReplaceArray_C(), nullptr, ReplaceArray); MatComposeOp_CUPM(to_host, pobj, MatDenseCUPMSetPreallocation_C(), nullptr, SetPreallocation); if (to_host) { if (auto &m_A = MatIMPLCast(B)->A) PetscCall(MatConvert(m_A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m_A)); B->offloadmask = PETSC_OFFLOAD_CPU; } else { if (auto &m_A = MatIMPLCast(B)->A) { PetscCall(MatConvert(m_A, MATSEQDENSECUPM(), MAT_INPLACE_MATRIX, &m_A)); B->offloadmask = PETSC_OFFLOAD_BOTH; } else { B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; } PetscCall(BindToCPU(B, PETSC_FALSE)); } // ============================================================ // Function Pointer Ops // ============================================================ MatSetOp_CUPM(to_host, B, getdiagonal, MatGetDiagonal_MPIDense, GetDiagonal); MatSetOp_CUPM(to_host, B, bindtocpu, nullptr, BindToCPU); } PetscFunctionReturn(PETSC_SUCCESS); } // ========================================================================================== // MatDense_MPI_CUPM -- Public API // ========================================================================================== template inline constexpr const char *MatDense_MPI_CUPM::MatConvert_mpidensecupm_mpidense_C() noexcept { return T == device::cupm::DeviceType::CUDA ? "MatConvert_mpidensecuda_mpidense_C" : "MatConvert_mpidensehip_mpidense_C"; } template inline constexpr const char *MatDense_MPI_CUPM::MatProductSetFromOptions_mpiaij_mpidensecupm_C() noexcept { return T == device::cupm::DeviceType::CUDA ? "MatProductSetFromOptions_mpiaij_mpidensecuda_C" : "MatProductSetFromOptions_mpiaij_mpidensehip_C"; } template inline constexpr const char *MatDense_MPI_CUPM::MatProductSetFromOptions_mpidensecupm_mpiaij_C() noexcept { return T == device::cupm::DeviceType::CUDA ? "MatProductSetFromOptions_mpidensecuda_mpiaij_C" : "MatProductSetFromOptions_mpidensehip_mpiaij_C"; } template inline constexpr const char *MatDense_MPI_CUPM::MatProductSetFromOptions_mpiaijcupmsparse_mpidensecupm_C() noexcept { return T == device::cupm::DeviceType::CUDA ? "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C" : "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C"; } template inline constexpr const char *MatDense_MPI_CUPM::MatProductSetFromOptions_mpidensecupm_mpiaijcupmsparse_C() noexcept { return T == device::cupm::DeviceType::CUDA ? "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C" : "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C"; } // ========================================================================================== template inline PetscErrorCode MatDense_MPI_CUPM::Create(Mat A) noexcept { PetscFunctionBegin; PetscCall(MatCreate_MPIDense(A)); PetscCall(Convert_MPIDense_MPIDenseCUPM(A, MATMPIDENSECUPM(), MAT_INPLACE_MATRIX, &A)); PetscFunctionReturn(PETSC_SUCCESS); } // ========================================================================================== template inline PetscErrorCode MatDense_MPI_CUPM::BindToCPU(Mat A, PetscBool usehost) noexcept { const auto mimpl = MatIMPLCast(A); const auto pobj = PetscObjectCast(A); PetscFunctionBegin; PetscCheck(!mimpl->vecinuse, PetscObjectComm(pobj), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!mimpl->matinuse, PetscObjectComm(pobj), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); if (const auto mimpl_A = mimpl->A) PetscCall(MatBindToCPU(mimpl_A, usehost)); A->boundtocpu = usehost; PetscCall(PetscStrFreeAllocpy(usehost ? PETSCRANDER48 : PETSCDEVICERAND(), &A->defaultrandtype)); if (!usehost) { PetscBool iscupm; PetscCall(PetscObjectTypeCompare(PetscObjectCast(mimpl->cvec), VecMPI_CUPM::VECMPICUPM(), &iscupm)); if (!iscupm) PetscCall(VecDestroy(&mimpl->cvec)); PetscCall(PetscObjectTypeCompare(PetscObjectCast(mimpl->cmat), MATMPIDENSECUPM(), &iscupm)); if (!iscupm) PetscCall(MatDestroy(&mimpl->cmat)); } MatComposeOp_CUPM(usehost, pobj, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense, GetColumnVec); MatComposeOp_CUPM(usehost, pobj, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense, RestoreColumnVec); MatComposeOp_CUPM(usehost, pobj, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense, GetColumnVec); MatComposeOp_CUPM(usehost, pobj, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense, RestoreColumnVec); MatComposeOp_CUPM(usehost, pobj, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense, GetColumnVec); MatComposeOp_CUPM(usehost, pobj, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense, RestoreColumnVec); MatSetOp_CUPM(usehost, A, shift, MatShift_MPIDense, Shift); if (const auto mimpl_cmat = mimpl->cmat) PetscCall(MatBindToCPU(mimpl_cmat, usehost)); PetscFunctionReturn(PETSC_SUCCESS); } template inline PetscErrorCode MatDense_MPI_CUPM::Convert_MPIDenseCUPM_MPIDense(Mat M, MatType mtype, MatReuse reuse, Mat *newmat) noexcept { PetscFunctionBegin; PetscCall(Convert_Dispatch_(M, mtype, reuse, newmat)); PetscFunctionReturn(PETSC_SUCCESS); } template inline PetscErrorCode MatDense_MPI_CUPM::Convert_MPIDense_MPIDenseCUPM(Mat M, MatType mtype, MatReuse reuse, Mat *newmat) noexcept { PetscFunctionBegin; PetscCall(Convert_Dispatch_(M, mtype, reuse, newmat)); PetscFunctionReturn(PETSC_SUCCESS); } // ========================================================================================== template template inline PetscErrorCode MatDense_MPI_CUPM::GetArray(Mat A, PetscScalar **array, PetscDeviceContext dctx) noexcept { auto &mimplA = MatIMPLCast(A)->A; PetscFunctionBegin; if (!mimplA) PetscCall(MatCreateSeqDenseCUPM(PETSC_COMM_SELF, A->rmap->n, A->cmap->N, nullptr, &mimplA, dctx)); PetscCall(MatDenseCUPMGetArray_Private(mimplA, array)); PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode MatDense_MPI_CUPM::RestoreArray(Mat A, PetscScalar **array, PetscDeviceContext) noexcept { PetscFunctionBegin; PetscCall(MatDenseCUPMRestoreArray_Private(MatIMPLCast(A)->A, array)); PetscFunctionReturn(PETSC_SUCCESS); } // ========================================================================================== template template inline PetscErrorCode MatDense_MPI_CUPM::GetColumnVec(Mat A, PetscInt col, Vec *v) noexcept { using namespace vec::cupm; const auto mimpl = MatIMPLCast(A); const auto mimpl_A = mimpl->A; const auto pobj = PetscObjectCast(A); PetscInt lda; PetscFunctionBegin; PetscCheck(!mimpl->vecinuse, PetscObjectComm(pobj), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!mimpl->matinuse, PetscObjectComm(pobj), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); mimpl->vecinuse = col + 1; if (!mimpl->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &mimpl->cvec)); PetscCall(MatDenseGetLDA(mimpl_A, &lda)); PetscCall(MatDenseCUPMGetArray_Private(mimpl_A, const_cast(&mimpl->ptrinuse))); PetscCall(VecCUPMPlaceArrayAsync(mimpl->cvec, mimpl->ptrinuse + static_cast(col) * static_cast(lda))); if (access == PETSC_MEMORY_ACCESS_READ) PetscCall(VecLockReadPush(mimpl->cvec)); *v = mimpl->cvec; PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode MatDense_MPI_CUPM::RestoreColumnVec(Mat A, PetscInt, Vec *v) noexcept { using namespace vec::cupm; const auto mimpl = MatIMPLCast(A); const auto cvec = mimpl->cvec; PetscFunctionBegin; PetscCheck(mimpl->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); PetscCheck(cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); mimpl->vecinuse = 0; PetscCall(MatDenseCUPMRestoreArray_Private(mimpl->A, const_cast(&mimpl->ptrinuse))); if (access == PETSC_MEMORY_ACCESS_READ) PetscCall(VecLockReadPop(cvec)); PetscCall(VecCUPMResetArrayAsync(cvec)); if (v) *v = nullptr; PetscFunctionReturn(PETSC_SUCCESS); } // ========================================================================================== template inline PetscErrorCode MatDense_MPI_CUPM::PlaceArray(Mat A, const PetscScalar *array) noexcept { const auto mimpl = MatIMPLCast(A); PetscFunctionBegin; PetscCheck(!mimpl->vecinuse, PetscObjectComm(PetscObjectCast(A)), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!mimpl->matinuse, PetscObjectComm(PetscObjectCast(A)), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); PetscCall(MatDenseCUPMPlaceArray(mimpl->A, array)); PetscFunctionReturn(PETSC_SUCCESS); } template inline PetscErrorCode MatDense_MPI_CUPM::ReplaceArray(Mat A, const PetscScalar *array) noexcept { const auto mimpl = MatIMPLCast(A); PetscFunctionBegin; PetscCheck(!mimpl->vecinuse, PetscObjectComm(PetscObjectCast(A)), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!mimpl->matinuse, PetscObjectComm(PetscObjectCast(A)), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); PetscCall(MatDenseCUPMReplaceArray(mimpl->A, array)); PetscFunctionReturn(PETSC_SUCCESS); } template inline PetscErrorCode MatDense_MPI_CUPM::ResetArray(Mat A) noexcept { const auto mimpl = MatIMPLCast(A); PetscFunctionBegin; PetscCheck(!mimpl->vecinuse, PetscObjectComm(PetscObjectCast(A)), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); PetscCheck(!mimpl->matinuse, PetscObjectComm(PetscObjectCast(A)), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); PetscCall(MatDenseCUPMResetArray(mimpl->A)); PetscFunctionReturn(PETSC_SUCCESS); } } // namespace impl namespace { template inline PetscErrorCode MatCreateDenseCUPM(MPI_Comm comm, PetscInt n, PetscInt m, PetscInt N, PetscInt M, PetscScalar *data, Mat *A, PetscDeviceContext dctx = nullptr) noexcept { PetscMPIInt size; PetscFunctionBegin; PetscAssertPointer(A, 7); PetscCallMPI(MPI_Comm_size(comm, &size)); if (size > 1) { PetscCall(MatCreateMPIDenseCUPM(comm, n, m, N, M, data, A, dctx)); } else { if (n == PETSC_DECIDE) n = N; if (m == PETSC_DECIDE) m = M; // It's OK here if both are PETSC_DECIDE since PetscSplitOwnership() will catch that down // the line PetscCall(MatCreateSeqDenseCUPM(comm, n, m, data, A, dctx)); } PetscFunctionReturn(PETSC_SUCCESS); } } // anonymous namespace } // namespace cupm } // namespace mat } // namespace Petsc