Lines Matching refs:d_a
426 float *d_a, *d_b, *d_c; in runStream() local
433 PetscCallCUDA(cudaMalloc((void **)&d_a, sizeof(float) * N)); in runStream()
444 set_array<<<dimGrid, dimBlock>>>(d_a, 2.f, N); in runStream()
454 STREAM_Copy<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
462 STREAM_Copy_Optimized<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
490 STREAM_Add<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
504 STREAM_Add_Optimized<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
513 STREAM_Triad<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
522 STREAM_Triad_Optimized<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
554 set_array<<<dimGrid, dimBlock>>>(d_a, 2.f, N); in runStream()
558 STREAM_Copy<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
559 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
568 set_array<<<dimGrid, dimBlock>>>(d_a, 2.f, N); in runStream()
572 STREAM_Copy_Optimized<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
573 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
582 set_array<<<dimGrid, dimBlock>>>(d_a, 2.f, N); in runStream()
596 set_array<<<dimGrid, dimBlock>>>(d_a, 2.f, N); in runStream()
600 STREAM_Add<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
601 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
611 set_array<<<dimGrid, dimBlock>>>(d_a, 2.f, N); in runStream()
615 STREAM_Triad<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
616 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
633 PetscCallCUDA(cudaFree(d_a)); in runStream()
641 double *d_a, *d_b, *d_c; in runStreamDouble() local
648 PetscCallCUDA(cudaMalloc((void **)&d_a, sizeof(double) * N)); in runStreamDouble()
659 set_array_double<<<dimGrid, dimBlock>>>(d_a, 2., N); in runStreamDouble()
669 STREAM_Copy_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
680 STREAM_Copy_Optimized_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
709 STREAM_Add_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
718 STREAM_Add_Optimized_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
727 STREAM_Triad_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
736 STREAM_Triad_Optimized_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
768 set_array_double<<<dimGrid, dimBlock>>>(d_a, 2., N); in runStreamDouble()
772 STREAM_Copy_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
773 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
782 set_array_double<<<dimGrid, dimBlock>>>(d_a, 2., N); in runStreamDouble()
786 STREAM_Copy_Optimized_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
787 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
809 set_array_double<<<dimGrid, dimBlock>>>(d_a, 2., N); in runStreamDouble()
813 STREAM_Add_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
814 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
824 set_array_double<<<dimGrid, dimBlock>>>(d_a, 2., N); in runStreamDouble()
828 STREAM_Triad_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
829 PetscCallCUDA(cudaMemcpy(h_a, d_a, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
846 PetscCallCUDA(cudaFree(d_a)); in runStreamDouble()