1 #include <petscdevice_cuda.h> 2 #include "ex18.h" 3 4 __global__ void FillValues(PetscInt n, PetscScalar *v) 5 { 6 PetscInt i = blockIdx.x * blockDim.x + threadIdx.x; 7 PetscScalar *s; 8 if (i < n) { 9 s = &v[3 * 3 * i]; 10 for (PetscInt vi = 0; vi < 3; vi++) { 11 for (PetscInt vj = 0; vj < 3; vj++) s[vi * 3 + vj] = vi + 2 * vj; 12 } 13 } 14 } 15 16 PetscErrorCode FillMatrixCUDACOO(FEStruct *fe, Mat A) 17 { 18 PetscScalar *v; 19 20 PetscFunctionBeginUser; 21 PetscCallCUDA(cudaMalloc((void **)&v, 3 * 3 * fe->Ne * sizeof(PetscScalar))); 22 FillValues<<<(fe->Ne + 255) / 256, 256>>>(fe->Ne, v); 23 PetscCall(MatSetValuesCOO(A, v, INSERT_VALUES)); 24 PetscCallCUDA(cudaFree(v)); 25 PetscFunctionReturn(0); 26 } 27