Lines Matching refs:d_c

426   float *d_a, *d_b, *d_c;  in runStream()  local
435 PetscCallCUDA(cudaMalloc((void **)&d_c, sizeof(float) * N)); in runStream()
446 set_array<<<dimGrid, dimBlock>>>(d_c, .5f, N); in runStream()
454 STREAM_Copy<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
462 STREAM_Copy_Optimized<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
471 STREAM_Scale<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStream()
480 STREAM_Scale_Optimized<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStream()
490 STREAM_Add<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
504 STREAM_Add_Optimized<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
513 STREAM_Triad<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
522 STREAM_Triad_Optimized<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
556 set_array<<<dimGrid, dimBlock>>>(d_c, .5f, N); in runStream()
558 STREAM_Copy<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
560 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
570 set_array<<<dimGrid, dimBlock>>>(d_c, .5f, N); in runStream()
572 STREAM_Copy_Optimized<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStream()
574 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
584 set_array<<<dimGrid, dimBlock>>>(d_c, .5f, N); in runStream()
586 STREAM_Scale<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStream()
588 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
598 set_array<<<dimGrid, dimBlock>>>(d_c, .5f, N); in runStream()
600 STREAM_Add<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStream()
603 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
613 set_array<<<dimGrid, dimBlock>>>(d_c, .5f, N); in runStream()
615 STREAM_Triad<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStream()
618 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(float) * N, cudaMemcpyDeviceToHost)); in runStream()
635 PetscCallCUDA(cudaFree(d_c)); in runStream()
641 double *d_a, *d_b, *d_c; in runStreamDouble() local
650 PetscCallCUDA(cudaMalloc((void **)&d_c, sizeof(double) * N)); in runStreamDouble()
661 set_array_double<<<dimGrid, dimBlock>>>(d_c, .5, N); in runStreamDouble()
669 STREAM_Copy_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
680 STREAM_Copy_Optimized_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
691 STREAM_Scale_double<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStreamDouble()
700 STREAM_Scale_Optimized_double<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStreamDouble()
709 STREAM_Add_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
718 STREAM_Add_Optimized_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
727 STREAM_Triad_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
736 STREAM_Triad_Optimized_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
770 set_array_double<<<dimGrid, dimBlock>>>(d_c, .5, N); in runStreamDouble()
772 STREAM_Copy_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
774 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
784 set_array_double<<<dimGrid, dimBlock>>>(d_c, .5, N); in runStreamDouble()
786 STREAM_Copy_Optimized_double<<<dimGrid, dimBlock>>>(d_a, d_c, N); in runStreamDouble()
788 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
797 set_array_double<<<dimGrid, dimBlock>>>(d_c, .5, N); in runStreamDouble()
799 STREAM_Scale_double<<<dimGrid, dimBlock>>>(d_b, d_c, scalar, N); in runStreamDouble()
801 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
811 set_array_double<<<dimGrid, dimBlock>>>(d_c, .5, N); in runStreamDouble()
813 STREAM_Add_double<<<dimGrid, dimBlock>>>(d_a, d_b, d_c, N); in runStreamDouble()
816 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
826 set_array_double<<<dimGrid, dimBlock>>>(d_c, .5, N); in runStreamDouble()
828 STREAM_Triad_double<<<dimGrid, dimBlock>>>(d_b, d_c, d_a, scalar, N); in runStreamDouble()
831 PetscCallCUDA(cudaMemcpy(h_c, d_c, sizeof(double) * N, cudaMemcpyDeviceToHost)); in runStreamDouble()
848 PetscCallCUDA(cudaFree(d_c)); in runStreamDouble()