xref: /petsc/src/mat/impls/cufft/cufft.cu (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3     Provides an interface to the CUFFT package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <cuda.h>
10 #include <cuda_runtime.h>
11 #include <cufft.h>
12 EXTERN_C_END
13 
14 typedef struct {
15   PetscInt     ndim;
16   PetscInt     *dim;
17   cufftHandle  p_forward, p_backward;
18   cufftComplex *devArray;
19 } Mat_CUFFT;
20 
21 #undef __FUNCT__
22 #define __FUNCT__ "MatMult_SeqCUFFT"
23 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
24 {
25   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
26   cufftComplex   *devArray = cufft->devArray;
27   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
28   PetscScalar    *x_array, *y_array;
29   cufftResult    result;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
34   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
35   if (!cufft->p_forward) {
36     cufftResult result;
37     /* create a plan, then execute it */
38     switch (ndim) {
39     case 1:
40       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
41       break;
42     case 2:
43       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
44       break;
45     case 3:
46       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
47       break;
48     default:
49       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
50     }
51   }
52   /* transfer to GPU memory */
53   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
54   /* execute transform */
55   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS);
56   /* transfer from GPU memory */
57   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
58   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
59   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 
63 #undef __FUNCT__
64 #define __FUNCT__ "MatMultTranspose_SeqCUFFT"
65 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
66 {
67   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
68   cufftComplex   *devArray = cufft->devArray;
69   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
70   PetscScalar    *x_array, *y_array;
71   cufftResult    result;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
76   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
77   if (!cufft->p_backward) {
78     /* create a plan, then execute it */
79     switch (ndim) {
80     case 1:
81       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
82       break;
83     case 2:
84       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
85       break;
86     case 3:
87       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
88       break;
89     default:
90       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
91     }
92   }
93   /* transfer to GPU memory */
94   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
95   /* execute transform */
96   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS);
97   /* transfer from GPU memory */
98   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
99   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
100   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatDestroy_SeqCUFFT"
106 PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
107 {
108   Mat_CUFFT      *cufft = (Mat_CUFFT*) A->data;
109   cufftResult    result;
110   PetscErrorCode ierr;
111 
112   PetscFunctionBegin;
113   ierr = PetscFree(cufft->dim);CHKERRQ(ierr);
114   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);}
115   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);}
116   cudaFree(cufft->devArray);
117   ierr = PetscFree(A->data);CHKERRQ(ierr);
118   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatCreateSeqCUFFT"
124 /*@
125   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
126 
127   Collective on MPI_Comm
128 
129   Input Parameters:
130 + comm - MPI communicator, set to PETSC_COMM_SELF
131 . ndim - the ndim-dimensional transform
132 - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
133 
134   Output Parameter:
135 . A - the matrix
136 
137   Options Database Keys:
138 . -mat_cufft_plannerflags - set CUFFT planner flags
139 
140   Level: intermediate
141 @*/
142 PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
143 {
144   Mat_CUFFT      *cufft;
145   PetscInt       m, d;
146   PetscErrorCode ierr;
147 
148   PetscFunctionBegin;
149   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
150   ierr = MatCreate(comm, A);CHKERRQ(ierr);
151   m    = 1;
152   for (d = 0; d < ndim; ++d) {
153     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
154     m *= dim[d];
155   }
156   ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr);
157   ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr);
158 
159   ierr       = PetscNewLog(*A,&cufft);CHKERRQ(ierr);
160   (*A)->data = (void*) cufft;
161   ierr       = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr);
162   ierr       = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr);
163 
164   cufft->ndim       = ndim;
165   cufft->p_forward  = 0;
166   cufft->p_backward = 0;
167   cufft->dim[ndim]  = m;
168 
169   /* GPU memory allocation */
170   cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);
171 
172   (*A)->ops->mult          = MatMult_SeqCUFFT;
173   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
174   (*A)->assembled          = PETSC_TRUE;
175   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
176 
177   /* get runtime options */
178   ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr);
179   ierr = PetscOptionsEnd();CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182