xref: /petsc/src/mat/tutorials/ex18cu.cu (revision 31d78bcd2b98084dc1368b20eb1129c8b9fb39fe)
10e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h>
2735d7f90SBarry Smith #include "ex18.h"
3735d7f90SBarry Smith 
FillValues(PetscInt n,PetscScalar * v)4d71ae5a4SJacob Faibussowitsch __global__ void FillValues(PetscInt n, PetscScalar *v)
5d71ae5a4SJacob Faibussowitsch {
6735d7f90SBarry Smith   PetscInt     i = blockIdx.x * blockDim.x + threadIdx.x;
7735d7f90SBarry Smith   PetscScalar *s;
8735d7f90SBarry Smith   if (i < n) {
9735d7f90SBarry Smith     s = &v[3 * 3 * i];
10735d7f90SBarry Smith     for (PetscInt vi = 0; vi < 3; vi++) {
11ad540459SPierre Jolivet       for (PetscInt vj = 0; vj < 3; vj++) s[vi * 3 + vj] = vi + 2 * vj;
12735d7f90SBarry Smith     }
13735d7f90SBarry Smith   }
14735d7f90SBarry Smith }
15735d7f90SBarry Smith 
FillMatrixCUDACOO(FEStruct * fe,Mat A)16d71ae5a4SJacob Faibussowitsch PetscErrorCode FillMatrixCUDACOO(FEStruct *fe, Mat A)
17d71ae5a4SJacob Faibussowitsch {
18735d7f90SBarry Smith   PetscScalar *v;
19735d7f90SBarry Smith 
20735d7f90SBarry Smith   PetscFunctionBeginUser;
219566063dSJacob Faibussowitsch   PetscCallCUDA(cudaMalloc((void **)&v, 3 * 3 * fe->Ne * sizeof(PetscScalar)));
22735d7f90SBarry Smith   FillValues<<<(fe->Ne + 255) / 256, 256>>>(fe->Ne, v);
239566063dSJacob Faibussowitsch   PetscCall(MatSetValuesCOO(A, v, INSERT_VALUES));
249566063dSJacob Faibussowitsch   PetscCallCUDA(cudaFree(v));
25*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
26735d7f90SBarry Smith }
27