xref: /petsc/src/mat/impls/cufft/cufft.cu (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94) !
1 
2 /*
3     Provides an interface to the CUFFT package.
4     Testing examples can be found in ~src/mat/tests
5 */
6 
7 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <cuda.h>
10 #include <cuda_runtime.h>
11 #include <cufft.h>
12 EXTERN_C_END
13 
14 typedef struct {
15   PetscInt     ndim;
16   PetscInt     *dim;
17   cufftHandle  p_forward, p_backward;
18   cufftComplex *devArray;
19 } Mat_CUFFT;
20 
21 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
22 {
23   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
24   cufftComplex   *devArray = cufft->devArray;
25   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
26   PetscScalar    *x_array, *y_array;
27   cufftResult    result;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
32   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
33   if (!cufft->p_forward) {
34     cufftResult result;
35     /* create a plan, then execute it */
36     switch (ndim) {
37     case 1:
38       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
39       break;
40     case 2:
41       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
42       break;
43     case 3:
44       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
45       break;
46     default:
47       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
48     }
49   }
50   /* transfer to GPU memory */
51   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
52   /* execute transform */
53   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS);
54   /* transfer from GPU memory */
55   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
56   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
57   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
62 {
63   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
64   cufftComplex   *devArray = cufft->devArray;
65   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
66   PetscScalar    *x_array, *y_array;
67   cufftResult    result;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
72   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
73   if (!cufft->p_backward) {
74     /* create a plan, then execute it */
75     switch (ndim) {
76     case 1:
77       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
78       break;
79     case 2:
80       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
81       break;
82     case 3:
83       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
84       break;
85     default:
86       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
87     }
88   }
89   /* transfer to GPU memory */
90   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
91   /* execute transform */
92   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS);
93   /* transfer from GPU memory */
94   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
95   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
96   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 
100 PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
101 {
102   Mat_CUFFT      *cufft = (Mat_CUFFT*) A->data;
103   cufftResult    result;
104   PetscErrorCode ierr;
105 
106   PetscFunctionBegin;
107   ierr = PetscFree(cufft->dim);CHKERRQ(ierr);
108   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);}
109   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);}
110   cudaFree(cufft->devArray);
111   ierr = PetscFree(A->data);CHKERRQ(ierr);
112   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 /*@
117   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
118 
119   Collective
120 
121   Input Parameters:
122 + comm - MPI communicator, set to PETSC_COMM_SELF
123 . ndim - the ndim-dimensional transform
124 - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
125 
126   Output Parameter:
127 . A - the matrix
128 
129   Options Database Keys:
130 . -mat_cufft_plannerflags - set CUFFT planner flags
131 
132   Level: intermediate
133 @*/
134 PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
135 {
136   Mat_CUFFT      *cufft;
137   PetscInt       m, d;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
142   ierr = MatCreate(comm, A);CHKERRQ(ierr);
143   m    = 1;
144   for (d = 0; d < ndim; ++d) {
145     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
146     m *= dim[d];
147   }
148   ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr);
149   ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr);
150 
151   ierr       = PetscNewLog(*A,&cufft);CHKERRQ(ierr);
152   (*A)->data = (void*) cufft;
153   ierr       = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr);
154   ierr       = PetscArraycpy(cufft->dim, dim, ndim);CHKERRQ(ierr);
155 
156   cufft->ndim       = ndim;
157   cufft->p_forward  = 0;
158   cufft->p_backward = 0;
159   cufft->dim[ndim]  = m;
160 
161   /* GPU memory allocation */
162   cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);
163 
164   (*A)->ops->mult          = MatMult_SeqCUFFT;
165   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
166   (*A)->assembled          = PETSC_TRUE;
167   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
168 
169   /* get runtime options */
170   ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr);
171   ierr = PetscOptionsEnd();CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174