xref: /petsc/src/mat/impls/cufft/cufft.cu (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 
2 /*
3     Provides an interface to the CUFFT package.
4     Testing examples can be found in ~src/mat/tests
5 */
6 
7 #include <petscdevice.h>
8 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
9 
10 typedef struct {
11   PetscInt      ndim;
12   PetscInt     *dim;
13   cufftHandle   p_forward, p_backward;
14   cufftComplex *devArray;
15 } Mat_CUFFT;
16 
17 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) {
18   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
19   cufftComplex *devArray = cufft->devArray;
20   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
21   PetscScalar  *x_array, *y_array;
22 
23   PetscFunctionBegin;
24   PetscCall(VecGetArray(x, &x_array));
25   PetscCall(VecGetArray(y, &y_array));
26   if (!cufft->p_forward) {
27     /* create a plan, then execute it */
28     switch (ndim) {
29     case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1)); break;
30     case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C)); break;
31     case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C)); break;
32     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
33     }
34   }
35   /* transfer to GPU memory */
36   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
37   /* execute transform */
38   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD));
39   /* transfer from GPU memory */
40   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
41   PetscCall(VecRestoreArray(y, &y_array));
42   PetscCall(VecRestoreArray(x, &x_array));
43   PetscFunctionReturn(0);
44 }
45 
46 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) {
47   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
48   cufftComplex *devArray = cufft->devArray;
49   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
50   PetscScalar  *x_array, *y_array;
51 
52   PetscFunctionBegin;
53   PetscCall(VecGetArray(x, &x_array));
54   PetscCall(VecGetArray(y, &y_array));
55   if (!cufft->p_backward) {
56     /* create a plan, then execute it */
57     switch (ndim) {
58     case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1)); break;
59     case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C)); break;
60     case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C)); break;
61     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
62     }
63   }
64   /* transfer to GPU memory */
65   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
66   /* execute transform */
67   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE));
68   /* transfer from GPU memory */
69   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
70   PetscCall(VecRestoreArray(y, &y_array));
71   PetscCall(VecRestoreArray(x, &x_array));
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatDestroy_SeqCUFFT(Mat A) {
76   Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
77 
78   PetscFunctionBegin;
79   PetscCall(PetscFree(cufft->dim));
80   if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward));
81   if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward));
82   PetscCallCUDA(cudaFree(cufft->devArray));
83   PetscCall(PetscFree(A->data));
84   PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0));
85   PetscFunctionReturn(0);
86 }
87 
88 /*@
89   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
90 
91   Collective
92 
93   Input Parameters:
94 + comm - MPI communicator, set to PETSC_COMM_SELF
95 . ndim - the ndim-dimensional transform
96 - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
97 
98   Output Parameter:
99 . A - the matrix
100 
101   Options Database Keys:
102 . -mat_cufft_plannerflags - set CUFFT planner flags
103 
104   Level: intermediate
105 @*/
106 PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) {
107   Mat_CUFFT *cufft;
108   PetscInt   m = 1;
109 
110   PetscFunctionBegin;
111   PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim);
112   if (ndim) PetscValidIntPointer(dim, 3);
113   PetscValidPointer(A, 4);
114   PetscCall(MatCreate(comm, A));
115   for (PetscInt d = 0; d < ndim; ++d) {
116     PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]);
117     m *= dim[d];
118   }
119   PetscCall(MatSetSizes(*A, m, m, m, m));
120   PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT));
121 
122   PetscCall(PetscNewLog(*A, &cufft));
123   (*A)->data = (void *)cufft;
124   PetscCall(PetscMalloc1(ndim + 1, &cufft->dim));
125   PetscCall(PetscArraycpy(cufft->dim, dim, ndim));
126 
127   cufft->ndim       = ndim;
128   cufft->p_forward  = 0;
129   cufft->p_backward = 0;
130   cufft->dim[ndim]  = m;
131 
132   /* GPU memory allocation */
133   PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m));
134 
135   (*A)->ops->mult          = MatMult_SeqCUFFT;
136   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
137   (*A)->assembled          = PETSC_TRUE;
138   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
139 
140   /* get runtime options ...what options????? */
141   PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
142   PetscOptionsEnd();
143   PetscFunctionReturn(0);
144 }
145