Lines Matching refs:mcu

204   const auto   mcu   = MatCUPMCast(m);  in SetPreallocation_()  local
217 if (!mcu->d_user_alloc) PetscCallCUPM(cupmFreeAsync(mcu->d_v, stream)); in SetPreallocation_()
219 mcu->d_user_alloc = PETSC_TRUE; in SetPreallocation_()
220 mcu->d_v = user_device_array; in SetPreallocation_()
224 mcu->d_user_alloc = PETSC_FALSE; in SetPreallocation_()
226 PetscCall(PetscCUPMMallocAsync(&mcu->d_v, size, stream)); in SetPreallocation_()
227 PetscCall(PetscCUPMMemsetAsync(mcu->d_v, 0, size, stream)); in SetPreallocation_()
245 const auto mcu = MatCUPMCast(m); in HostToDevice_() local
249 if (!mcu->d_v) PetscCall(SetPreallocation(m, dctx, nullptr)); in HostToDevice_()
256 const auto dest = mcu->d_v; in HostToDevice_()
334 …static PetscErrorCode ResizeFactLwork(Mat_SeqDenseCUPM *mcu, cupmStream_t stream, F &&cupmSolverCo… in ResizeFactLwork()
340 if (lwork > mcu->d_fact_lwork) { in ResizeFactLwork()
341 mcu->d_fact_lwork = lwork; in ResizeFactLwork()
342 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_work, stream)); in ResizeFactLwork()
343 PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_work, lwork, stream)); in ResizeFactLwork()
350 const auto mcu = MatCUPMCast(A); in FactorPrepare() local
361 if (!mcu->d_fact_info) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_info, 1, stream)); in FactorPrepare()
386 const auto mcu = MatCUPMCast(A); in Factor() local
393 mcu, stream, in Factor()
401 if (!mcu->d_fact_ipiv) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_ipiv, n, stream)); in Factor()
404 …SolverXgetrf(handle, m, n, da.cupmdata(), lda, mcu->d_fact_work, mcu->d_fact_lwork, mcu->d_fact_ip… in Factor()
406 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in Factor()
415 const auto mcu = MatCUPMCast(A); in Solve() local
416 const auto fact_info = mcu->d_fact_info; in Solve()
417 const auto fact_ipiv = mcu->d_fact_ipiv; in Solve()
432 mcu, stream, in Solve()
442 …getrs(handle, op, m, nrhs, da.cupmdata(), lda, fact_ipiv, x, ldx, mcu->d_fact_work, mcu->d_fact_lw… in Solve()
471 const auto mcu = MatCUPMCast(A); in Factor() local
478 mcu, stream, in Factor()
489 …dle, CUPMSOLVER_FILL_MODE_LOWER, n, da.cupmdata(), lda, mcu->d_fact_work, mcu->d_fact_lwork, mcu->… in Factor()
491 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in Factor()
499 if (!mcu->d_fact_ipiv) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_ipiv, n, stream)); in Factor()
500 if (!mcu->d_fact_lwork) { in Factor()
501 …PetscCallCUPMSOLVER(cupmSolverDnXsytrf_bufferSize(handle, n, da.cupmdata(), lda, &mcu->d_fact_lwor… in Factor()
502 PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_work, mcu->d_fact_lwork, stream)); in Factor()
504 if (mcu->d_fact_info) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_info, 1, stream)); in Factor()
506 …andle, CUPMSOLVER_FILL_MODE_LOWER, n, da, lda, mcu->d_fact_ipiv, mcu->d_fact_work, mcu->d_fact_lwo… in Factor()
515 const auto mcu = MatCUPMCast(A); in Solve() local
516 const auto fact_info = mcu->d_fact_info; in Solve()
520 …PetscAssert(!mcu->d_fact_ipiv, PETSC_COMM_SELF, PETSC_ERR_LIB, "%ssytrs not implemented", cupmSolv… in Solve()
531 mcu, stream, in Solve()
541 … CUPMSOLVER_FILL_MODE_LOWER, m, nrhs, da.cupmdata(), lda, x, ldx, mcu->d_fact_work, mcu->d_fact_lw… in Solve()
573 const auto mcu = MatCUPMCast(A); in Factor() local
577 …if (!mcu->workvec) PetscCall(vec::cupm::VecCreateSeqCUPMAsync<T>(PetscObjectComm(PetscObjectCast(A… in Factor()
578 if (!mcu->d_fact_tau) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_tau, min, stream)); in Factor()
582 mcu, stream, in Factor()
591 …mSolverXgeqrf(handle, m, n, da.cupmdata(), lda, mcu->d_fact_tau, mcu->d_fact_work, mcu->d_fact_lwo… in Factor()
593 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in Factor()
604 const auto mcu = MatCUPMCast(A); in Solve() local
605 const auto fact_info = mcu->d_fact_info; in Solve()
606 const auto fact_tau = mcu->d_fact_tau; in Solve()
607 const auto fact_work = mcu->d_fact_work; in Solve()
608 const auto fact_lwork = mcu->d_fact_lwork; in Solve()
938 Mat_SeqDenseCUPM *mcu; in Convert_Dispatch_() local
940 PetscCall(PetscNew(&mcu)); in Convert_Dispatch_()
941 B->spptr = mcu; in Convert_Dispatch_()
1030 if (const auto mcu = MatCUPMCast(A)) { in Reset() local
1033 …PetscCheck(!mcu->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "MatDense%sResetArray() must be … in Reset()
1035 if (!mcu->d_user_alloc) PetscCallCUPM(cupmFreeAsync(mcu->d_v, stream)); in Reset()
1036 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_tau, stream)); in Reset()
1037 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_ipiv, stream)); in Reset()
1038 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_info, stream)); in Reset()
1039 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_work, stream)); in Reset()
1040 PetscCall(VecDestroy(&mcu->workvec)); in Reset()
1233 const auto mcu = MatCUPMCast(A); in PlaceArray() local
1238 …PetscCheck(!mcu->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "MatDense%sResetArray() must be … in PlaceArray()
1245 mcu->unplacedarray = util::exchange(mcu->d_v, const_cast<PetscScalar *>(array)); in PlaceArray()
1246 mcu->d_unplaced_user_alloc = util::exchange(mcu->d_user_alloc, PETSC_TRUE); in PlaceArray()
1254 const auto mcu = MatCUPMCast(A); in ReplaceArray() local
1259 …PetscCheck(!mcu->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ORDER, "MatDense%sResetArray() must be … in ReplaceArray()
1260 if (!mcu->d_user_alloc) { in ReplaceArray()
1264 PetscCallCUPM(cupmFreeAsync(mcu->d_v, stream)); in ReplaceArray()
1266 mcu->d_v = const_cast<PetscScalar *>(array); in ReplaceArray()
1267 mcu->d_user_alloc = PETSC_FALSE; in ReplaceArray()
1275 const auto mcu = MatCUPMCast(A); in ResetArray() local
1286 mcu->d_v = util::exchange(mcu->unplacedarray, nullptr); in ResetArray()
1287 mcu->d_user_alloc = mcu->d_unplaced_user_alloc; in ResetArray()
1784 const auto mcu = MatCUPMCast(A); in InvertFactors() local
1798 …PetscCheck(!mcu->d_fact_ipiv, PETSC_COMM_SELF, PETSC_ERR_LIB, "%sDnsytri not implemented", cupmSol… in InvertFactors()
1807 if (il > mcu->d_fact_lwork) { in InvertFactors()
1808 mcu->d_fact_lwork = il; in InvertFactors()
1809 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_work, stream)); in InvertFactors()
1810 PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_work, il, stream)); in InvertFactors()
1813 …dle, CUPMSOLVER_FILL_MODE_LOWER, n, da.cupmdata(), lda, mcu->d_fact_work, mcu->d_fact_lwork, mcu->… in InvertFactors()
1816 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in InvertFactors()