10e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h> 2735d7f90SBarry Smith #include "ex18.h" 3735d7f90SBarry Smith FillValues(PetscInt n,PetscScalar * v)4d71ae5a4SJacob Faibussowitsch__global__ void FillValues(PetscInt n, PetscScalar *v) 5d71ae5a4SJacob Faibussowitsch { 6735d7f90SBarry Smith PetscInt i = blockIdx.x * blockDim.x + threadIdx.x; 7735d7f90SBarry Smith PetscScalar *s; 8735d7f90SBarry Smith if (i < n) { 9735d7f90SBarry Smith s = &v[3 * 3 * i]; 10735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 11ad540459SPierre Jolivet for (PetscInt vj = 0; vj < 3; vj++) s[vi * 3 + vj] = vi + 2 * vj; 12735d7f90SBarry Smith } 13735d7f90SBarry Smith } 14735d7f90SBarry Smith } 15735d7f90SBarry Smith FillMatrixCUDACOO(FEStruct * fe,Mat A)16d71ae5a4SJacob FaibussowitschPetscErrorCode FillMatrixCUDACOO(FEStruct *fe, Mat A) 17d71ae5a4SJacob Faibussowitsch { 18735d7f90SBarry Smith PetscScalar *v; 19735d7f90SBarry Smith 20735d7f90SBarry Smith PetscFunctionBeginUser; 219566063dSJacob Faibussowitsch PetscCallCUDA(cudaMalloc((void **)&v, 3 * 3 * fe->Ne * sizeof(PetscScalar))); 22735d7f90SBarry Smith FillValues<<<(fe->Ne + 255) / 256, 256>>>(fe->Ne, v); 239566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(A, v, INSERT_VALUES)); 249566063dSJacob Faibussowitsch PetscCallCUDA(cudaFree(v)); 25*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26735d7f90SBarry Smith } 27