Lines Matching refs:stream
208 cupmStream_t stream; in SetPreallocation_() local
215 PetscCall(GetHandlesFrom_(dctx, &stream)); in SetPreallocation_()
217 if (!mcu->d_user_alloc) PetscCallCUPM(cupmFreeAsync(mcu->d_v, stream)); in SetPreallocation_()
226 PetscCall(PetscCUPMMallocAsync(&mcu->d_v, size, stream)); in SetPreallocation_()
227 PetscCall(PetscCUPMMemsetAsync(mcu->d_v, 0, size, stream)); in SetPreallocation_()
246 cupmStream_t stream; in HostToDevice_() local
250 PetscCall(GetHandlesFrom_(dctx, &stream)); in HostToDevice_()
259 …tscCall(PetscCUPMMemcpy2DAsync(dest, lda, src, lda, nrows, ncols, cupmMemcpyHostToDevice, stream)); in HostToDevice_()
261 PetscCall(PetscCUPMMemcpyAsync(dest, src, lda * ncols, cupmMemcpyHostToDevice, stream)); in HostToDevice_()
283 cupmStream_t stream; in DeviceToHost_() local
287 PetscCall(GetHandlesFrom_(dctx, &stream)); in DeviceToHost_()
295 …tscCall(PetscCUPMMemcpy2DAsync(dest, lda, src, lda, nrows, ncols, cupmMemcpyDeviceToHost, stream)); in DeviceToHost_()
297 PetscCall(PetscCUPMMemcpyAsync(dest, src, lda * ncols, cupmMemcpyDeviceToHost, stream)); in DeviceToHost_()
308 …nse_Seq_CUPM<T>::CheckCUPMSolverInfo_(const cupmBlasInt_t *fact_info, cupmStream_t stream) noexcept in CheckCUPMSolverInfo_() argument
314 PetscCall(PetscCUPMMemcpyAsync(&info, fact_info, 1, cupmMemcpyDeviceToHost, stream)); in CheckCUPMSolverInfo_()
315 if (stream) PetscCallCUPM(cupmStreamSynchronize(stream)); in CheckCUPMSolverInfo_()
334 …static PetscErrorCode ResizeFactLwork(Mat_SeqDenseCUPM *mcu, cupmStream_t stream, F &&cupmSolverCo… in ResizeFactLwork()
342 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_work, stream)); in ResizeFactLwork()
343 PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_work, lwork, stream)); in ResizeFactLwork()
348 static PetscErrorCode FactorPrepare(Mat A, cupmStream_t stream) noexcept in FactorPrepare()
361 if (!mcu->d_fact_info) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_info, 1, stream)); in FactorPrepare()
377 cupmStream_t stream; in Factor() local
383 PetscCall(GetHandles_(&dctx, &handle, &stream)); in Factor()
384 PetscCall(base_type::FactorPrepare(A, stream)); in Factor()
393 mcu, stream, in Factor()
401 if (!mcu->d_fact_ipiv) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_ipiv, n, stream)); in Factor()
406 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in Factor()
413 …nt_t m, cupmBlasInt_t nrhs, cupmBlasInt_t k, PetscDeviceContext dctx, cupmStream_t stream) noexcept in Solve()
432 mcu, stream, in Solve()
443 PetscCall(CheckCUPMSolverInfo_(fact_info, stream)); in Solve()
463 cupmStream_t stream; in Factor() local
468 PetscCall(GetHandles_(&dctx, &handle, &stream)); in Factor()
469 PetscCall(base_type::FactorPrepare(A, stream)); in Factor()
478 mcu, stream, in Factor()
491 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in Factor()
499 if (!mcu->d_fact_ipiv) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_ipiv, n, stream)); in Factor()
502 PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_work, mcu->d_fact_lwork, stream)); in Factor()
504 if (mcu->d_fact_info) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_info, 1, stream)); in Factor()
513 …nt_t m, cupmBlasInt_t nrhs, cupmBlasInt_t k, PetscDeviceContext dctx, cupmStream_t stream) noexcept in Solve()
531 mcu, stream, in Solve()
542 PetscCall(CheckCUPMSolverInfo_(fact_info, stream)); in Solve()
563 cupmStream_t stream; in Factor() local
569 PetscCall(GetHandles_(&dctx, &handle, &stream)); in Factor()
570 PetscCall(base_type::FactorPrepare(A, stream)); in Factor()
578 if (!mcu->d_fact_tau) PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_tau, min, stream)); in Factor()
582 mcu, stream, in Factor()
593 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in Factor()
600 …nt_t m, cupmBlasInt_t nrhs, cupmBlasInt_t k, PetscDeviceContext dctx, cupmStream_t stream) noexcept in Solve()
624 PetscCall(CheckCUPMSolverInfo_(fact_info, stream)); in Solve()
629 PetscCall(CheckCUPMSolverInfo_(fact_info, stream)); in Solve()
652 cupmStream_t stream; in MatSolve_Factored_Dispatch_() local
660 PetscCall(GetHandles_(&dctx, &stream)); in MatSolve_Factored_Dispatch_()
680 PetscCall(PetscCUPMMemcpyAsync(y_array, x_array, m, copy_mode, stream)); in MatSolve_Factored_Dispatch_()
689 …cCall(Solver{}.template Solve<transpose>(A, cupmScalarPtrCast(y_array), m, m, 1, k, dctx, stream)); in MatSolve_Factored_Dispatch_()
706 PetscCall(PetscCUPMMemcpyAsync(yv, y_array, k, copy_mode, stream)); in MatSolve_Factored_Dispatch_()
727 cupmStream_t stream; in MatMatSolve_Factored_Dispatch_() local
734 PetscCall(GetHandles_(&dctx, &stream)); in MatMatSolve_Factored_Dispatch_()
774 PetscCall(PetscCUPMMemcpy2DAsync(y, ldy, b, ldb, m, nrhs, copy_mode, stream)); in MatMatSolve_Factored_Dispatch_()
781 …scCall(Solver{}.template Solve<transpose>(A, cupmScalarPtrCast(y), ldy, m, nrhs, k, dctx, stream)); in MatMatSolve_Factored_Dispatch_()
796 PetscCall(PetscCUPMMemcpy2DAsync(x, ldx, y, ldy, k, nrhs, copy_mode, stream)); in MatMatSolve_Factored_Dispatch_()
798 PetscCallCUPM(cupmFreeAsync(y, stream)); in MatMatSolve_Factored_Dispatch_()
1031 cupmStream_t stream; in Reset() local
1034 PetscCall(GetHandles_(&stream)); in Reset()
1035 if (!mcu->d_user_alloc) PetscCallCUPM(cupmFreeAsync(mcu->d_v, stream)); in Reset()
1036 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_tau, stream)); in Reset()
1037 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_ipiv, stream)); in Reset()
1038 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_info, stream)); in Reset()
1039 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_work, stream)); in Reset()
1261 cupmStream_t stream; in ReplaceArray() local
1263 PetscCall(GetHandles_(&stream)); in ReplaceArray()
1264 PetscCallCUPM(cupmFreeAsync(mcu->d_v, stream)); in ReplaceArray()
1350 cupmStream_t stream; in Copy() local
1352 PetscCall(GetHandles_(&dctx, &stream)); in Copy()
1365 …etscCUPMMemcpy2DAsync(vb.data(), lda_b, va.data(), lda_a, m, n, cupmMemcpyDeviceToDevice, stream)); in Copy()
1367 … PetscCall(PetscCUPMMemcpyAsync(vb.data(), va.data(), m * n, cupmMemcpyDeviceToDevice, stream)); in Copy()
1381 cupmStream_t stream; in ZeroEntries() local
1384 PetscCall(GetHandles_(&dctx, &stream)); in ZeroEntries()
1393 PetscCall(PetscCUPMMemset2DAsync(va.data(), lda, 0, ma, na, stream)); in ZeroEntries()
1395 PetscCall(PetscCUPMMemsetAsync(va.data(), 0, ma * na, stream)); in ZeroEntries()
1465 cupmStream_t stream; in Conjugate() local
1469 PetscCall(GetHandles_(&dctx, &stream)); in Conjugate()
1474 cupmStream_t stream; in Conjugate() local
1475 PetscCall(GetHandlesFrom_(dctx, &stream)); in Conjugate()
1484 stream, in Conjugate()
1497 stream, in Conjugate()
1526 cupmStream_t stream; in Scale() local
1528 PetscCall(GetHandlesFrom_(dctx, &stream)); in Scale()
1535 stream, in Scale()
1574 cupmStream_t stream; in AXPY() local
1576 PetscCall(GetHandlesFrom_(dctx, &stream)); in AXPY()
1584 stream, in AXPY()
1667 cupmStream_t stream; in GetColumnVector() local
1671 PetscCall(GetHandles_(&dctx, &stream)); in GetColumnVector()
1677 …MMemcpyAsync(x.data(), col_offset(DeviceArrayRead(dctx, A)), n, cupmMemcpyDeviceToDevice, stream)); in GetColumnVector()
1679 …cCUPMMemcpyAsync(x.data(), col_offset(HostArrayRead(dctx, A)), n, cupmMemcpyHostToDevice, stream)); in GetColumnVector()
1689 …(PetscCUPMMemcpyAsync(x, col_offset(DeviceArrayRead(dctx, A)), n, cupmMemcpyDeviceToHost, stream)); in GetColumnVector()
1788 cupmStream_t stream; in InvertFactors() local
1800 PetscCall(GetHandles_(&dctx, &handle, &stream)); in InvertFactors()
1809 PetscCallCUPM(cupmFreeAsync(mcu->d_fact_work, stream)); in InvertFactors()
1810 PetscCall(PetscCUPMMallocAsync(&mcu->d_fact_work, il, stream)); in InvertFactors()
1816 PetscCall(CheckCUPMSolverInfo_(mcu->d_fact_info, stream)); in InvertFactors()