Lines Matching refs:d_b

426   float *d_a, *d_b, *d_c;  in runStream()  local
434 PetscCallCUDA(cudaMalloc((void **)&d_b, sizeof(float) * N)); in runStream()
445 set_array<<<dimGrid, dimBlock>>>(d_b, .5f, N); in runStream()
471 STREAM_Scale<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStream()
480 STREAM_Scale_Optimized<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStream()
490 STREAM_Add<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
504 STREAM_Add_Optimized<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
513 STREAM_Triad<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
522 STREAM_Triad_Optimized<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
555 set_array<<<dimGrid, dimBlock>>>(d_b, .5f, N); in runStream()
569 set_array<<<dimGrid, dimBlock>>>(d_b, .5f, N); in runStream()
583 set_array<<<dimGrid, dimBlock>>>(d_b, .5f, N); in runStream()
586 STREAM_Scale<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStream()
587 PetscCallCUDA(cudaMemcpy(h_b, d_b, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
597 set_array<<<dimGrid, dimBlock>>>(d_b, .5f, N); in runStream()
600 STREAM_Add<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
602 PetscCallCUDA(cudaMemcpy(h_b, d_b, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
612 set_array<<<dimGrid, dimBlock>>>(d_b, .5f, N); in runStream()
615 STREAM_Triad<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
617 PetscCallCUDA(cudaMemcpy(h_b, d_b, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
634 PetscCallCUDA(cudaFree(d_b)); in runStream()
641 double *d_a, *d_b, *d_c; in runStreamDouble() local
649 PetscCallCUDA(cudaMalloc((void **)&d_b, sizeof(double) * N)); in runStreamDouble()
660 set_array_double<<<dimGrid, dimBlock>>>(d_b, .5, N); in runStreamDouble()
691 STREAM_Scale_double<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStreamDouble()
700 STREAM_Scale_Optimized_double<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStreamDouble()
709 STREAM_Add_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
718 STREAM_Add_Optimized_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
727 STREAM_Triad_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
736 STREAM_Triad_Optimized_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
769 set_array_double<<<dimGrid, dimBlock>>>(d_b, .5, N); in runStreamDouble()
783 set_array_double<<<dimGrid, dimBlock>>>(d_b, .5, N); in runStreamDouble()
796 set_array_double<<<dimGrid, dimBlock>>>(d_b, .5, N); in runStreamDouble()
799 STREAM_Scale_double<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStreamDouble()
800 PetscCallCUDA(cudaMemcpy(h_b, d_b, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
810 set_array_double<<<dimGrid, dimBlock>>>(d_b, .5, N); in runStreamDouble()
813 STREAM_Add_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
815 PetscCallCUDA(cudaMemcpy(h_b, d_b, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
825 set_array_double<<<dimGrid, dimBlock>>>(d_b, .5, N); in runStreamDouble()
828 STREAM_Triad_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
830 PetscCallCUDA(cudaMemcpy(h_b, d_b, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
847 PetscCallCUDA(cudaFree(d_b)); in runStreamDouble()