xref: /petsc/src/mat/impls/cufft/cufft.cu (revision d61e7eb36d836a8e2bf1ad3f41344afcd0d5cf20)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the CUFFT package.
5     Testing examples can be found in ~src/mat/examples/tests
6 */
7 
8 #include "private/matimpl.h"          /*I "petscmat.h" I*/
9 EXTERN_C_BEGIN
10 #include <cuda.h>
11 #include <cuda_runtime.h>
12 #include <cufft.h>
13 EXTERN_C_END
14 
15 typedef struct {
16   PetscInt      ndim;
17   PetscInt     *dim;
18   cufftHandle   p_forward, p_backward;
19   cufftComplex *devArray;
20 } Mat_CUFFT;
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatMult_SeqCUFFT"
24 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
25 {
26   Mat_CUFFT     *cufft    = (Mat_CUFFT *) A->data;
27   cufftComplex  *devArray = cufft->devArray;
28   PetscInt       ndim     = cufft->ndim, *dim = cufft->dim;
29   PetscScalar   *x_array, *y_array;
30   cufftResult    result;
31   PetscErrorCode ierr;
32 
33   PetscFunctionBegin;
34   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
35   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
36   if (!cufft->p_forward) {
37     cufftResult result;
38     /* create a plan, then execute it */
39     switch(ndim) {
40     case 1:
41       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
42       break;
43     case 2:
44       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
45       break;
46     case 3:
47       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
48       break;
49     default:
50       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
51     }
52   }
53   /* transfer to GPU memory */
54   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
55   /* execute transform */
56   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS);
57   /* transfer from GPU memory */
58   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
59   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
60   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatMultTranspose_SeqCUFFT"
66 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
67 {
68   Mat_CUFFT     *cufft    = (Mat_CUFFT *) A->data;
69   cufftComplex  *devArray = cufft->devArray;
70   PetscInt       ndim     = cufft->ndim, *dim = cufft->dim;
71   PetscScalar   *x_array, *y_array;
72   cufftResult    result;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
77   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
78   if (!cufft->p_backward) {
79     /* create a plan, then execute it */
80     switch(ndim) {
81     case 1:
82       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
83       break;
84     case 2:
85       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
86       break;
87     case 3:
88       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
89       break;
90     default:
91       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
92     }
93   }
94   /* transfer to GPU memory */
95   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
96   /* execute transform */
97   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS);
98   /* transfer from GPU memory */
99   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
100   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
101   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatDestroy_SeqCUFFT"
107 PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
108 {
109   Mat_CUFFT     *cufft = (Mat_CUFFT *) A->data;
110   cufftResult    result;
111   PetscErrorCode ierr;
112 
113   PetscFunctionBegin;
114   ierr = PetscFree(cufft->dim);CHKERRQ(ierr);
115   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);}
116   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);}
117   cudaFree(cufft->devArray);
118   ierr = PetscFree(cufft);CHKERRQ(ierr);
119   ierr = PetscObjectChangeTypeName((PetscObject) A, PETSC_NULL);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatCreateSeqCUFFT"
125 /*@
126   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
127 
128   Collective on MPI_Comm
129 
130   Input Parameters:
131 + comm - MPI communicator, set to PETSC_COMM_SELF
132 . ndim - the ndim-dimensional transform
133 - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
134 
135   Output Parameter:
136 . A - the matrix
137 
138   Options Database Keys:
139 . -mat_cufft_plannerflags - set CUFFT planner flags
140 
141   Level: intermediate
142 @*/
143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat* A)
144 {
145   Mat_CUFFT     *cufft;
146   PetscInt       m, d;
147   PetscErrorCode ierr;
148 
149   PetscFunctionBegin;
150   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
151   ierr = MatCreate(comm, A);CHKERRQ(ierr);
152   m = 1;
153   for(d = 0; d < ndim; ++d){
154     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
155     m *= dim[d];
156   }
157   ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr);
158   ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr);
159 
160   ierr = PetscNewLog(*A, Mat_CUFFT, &cufft);CHKERRQ(ierr);
161   (*A)->data = (void*) cufft;
162   ierr = PetscMalloc((ndim+1)*sizeof(PetscInt), &cufft->dim);CHKERRQ(ierr);
163   ierr = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr);
164   cufft->ndim       = ndim;
165   cufft->p_forward  = 0;
166   cufft->p_backward = 0;
167   cufft->dim[ndim]  = m;
168 
169   /* GPU memory allocation */
170   cudaMalloc((void **) &cufft->devArray, sizeof(cufftComplex)*m);
171 
172   (*A)->ops->mult          = MatMult_SeqCUFFT;
173   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
174   (*A)->assembled          = PETSC_TRUE;
175   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
176 
177   /* get runtime options */
178   ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr);
179   ierr = PetscOptionsEnd();CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182