xref: /petsc/src/mat/impls/cufft/cufft.cu (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1 
2 /*
3     Provides an interface to the CUFFT package.
4     Testing examples can be found in ~src/mat/tests
5 */
6 
7 #include <petscdevice.h>
8 #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
9 
10 typedef struct {
11   PetscInt     ndim;
12   PetscInt     *dim;
13   cufftHandle  p_forward, p_backward;
14   cufftComplex *devArray;
15 } Mat_CUFFT;
16 
17 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
18 {
19   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
20   cufftComplex   *devArray = cufft->devArray;
21   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
22   PetscScalar    *x_array, *y_array;
23   cufftResult    result;
24   cudaError_t    cerr;
25   PetscErrorCode ierr;
26 
27   PetscFunctionBegin;
28   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
29   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
30   if (!cufft->p_forward) {
31     cufftResult result;
32     /* create a plan, then execute it */
33     switch (ndim) {
34     case 1:
35       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
36       break;
37     case 2:
38       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
39       break;
40     case 3:
41       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
42       break;
43     default:
44       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
45     }
46   }
47   /* transfer to GPU memory */
48   cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
49   /* execute transform */
50   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRCUFFT(result);
51   /* transfer from GPU memory */
52   cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
53   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
54   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
59 {
60   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
61   cufftComplex   *devArray = cufft->devArray;
62   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
63   PetscScalar    *x_array, *y_array;
64   cufftResult    result;
65   cudaError_t    cerr;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
70   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
71   if (!cufft->p_backward) {
72     /* create a plan, then execute it */
73     switch (ndim) {
74     case 1:
75       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
76       break;
77     case 2:
78       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
79       break;
80     case 3:
81       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
82       break;
83     default:
84       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
85     }
86   }
87   /* transfer to GPU memory */
88   cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
89   /* execute transform */
90   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRCUFFT(result);
91   /* transfer from GPU memory */
92   cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
93   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
94   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
95   PetscFunctionReturn(0);
96 }
97 
98 PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
99 {
100   Mat_CUFFT      *cufft = (Mat_CUFFT*) A->data;
101   cufftResult    result;
102   cudaError_t    cerr;
103   PetscErrorCode ierr;
104 
105   PetscFunctionBegin;
106   ierr = PetscFree(cufft->dim);CHKERRQ(ierr);
107   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRCUFFT(result);}
108   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRCUFFT(result);}
109   cerr = cudaFree(cufft->devArray);CHKERRCUDA(cerr);
110   ierr = PetscFree(A->data);CHKERRQ(ierr);
111   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 /*@
116   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
117 
118   Collective
119 
120   Input Parameters:
121 + comm - MPI communicator, set to PETSC_COMM_SELF
122 . ndim - the ndim-dimensional transform
123 - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
124 
125   Output Parameter:
126 . A - the matrix
127 
128   Options Database Keys:
129 . -mat_cufft_plannerflags - set CUFFT planner flags
130 
131   Level: intermediate
132 @*/
133 PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
134 {
135   Mat_CUFFT      *cufft;
136   PetscInt       m, d;
137   PetscErrorCode ierr;
138   cudaError_t    cerr;
139 
140   PetscFunctionBegin;
141   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
142   ierr = MatCreate(comm, A);CHKERRQ(ierr);
143   m    = 1;
144   for (d = 0; d < ndim; ++d) {
145     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
146     m *= dim[d];
147   }
148   ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr);
149   ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr);
150 
151   ierr       = PetscNewLog(*A,&cufft);CHKERRQ(ierr);
152   (*A)->data = (void*) cufft;
153   ierr       = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr);
154   ierr       = PetscArraycpy(cufft->dim, dim, ndim);CHKERRQ(ierr);
155 
156   cufft->ndim       = ndim;
157   cufft->p_forward  = 0;
158   cufft->p_backward = 0;
159   cufft->dim[ndim]  = m;
160 
161   /* GPU memory allocation */
162   cerr = cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);CHKERRCUDA(cerr);
163 
164   (*A)->ops->mult          = MatMult_SeqCUFFT;
165   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
166   (*A)->assembled          = PETSC_TRUE;
167   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
168 
169   /* get runtime options */
170   ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr);
171   ierr = PetscOptionsEnd();CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174