xref: /petsc/src/mat/impls/cufft/cufft.cu (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3     Provides an interface to the CUFFT package.
4     Testing examples can be found in ~src/mat/tests
5 */
6 
7 #include <petscdevice.h>
8 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
9 
10 typedef struct {
11   PetscInt      ndim;
12   PetscInt     *dim;
13   cufftHandle   p_forward, p_backward;
14   cufftComplex *devArray;
15 } Mat_CUFFT;
16 
17 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) {
18   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
19   cufftComplex *devArray = cufft->devArray;
20   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
21   PetscScalar  *x_array, *y_array;
22 
23   PetscFunctionBegin;
24   PetscCall(VecGetArray(x, &x_array));
25   PetscCall(VecGetArray(y, &y_array));
26   if (!cufft->p_forward) {
27     /* create a plan, then execute it */
28     switch (ndim) {
29     case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1)); break;
30     case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C)); break;
31     case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C)); break;
32     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
33     }
34   }
35   /* transfer to GPU memory */
36   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
37   /* execute transform */
38   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD));
39   /* transfer from GPU memory */
40   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
41   PetscCall(VecRestoreArray(y, &y_array));
42   PetscCall(VecRestoreArray(x, &x_array));
43   PetscFunctionReturn(0);
44 }
45 
46 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) {
47   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
48   cufftComplex *devArray = cufft->devArray;
49   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
50   PetscScalar  *x_array, *y_array;
51 
52   PetscFunctionBegin;
53   PetscCall(VecGetArray(x, &x_array));
54   PetscCall(VecGetArray(y, &y_array));
55   if (!cufft->p_backward) {
56     /* create a plan, then execute it */
57     switch (ndim) {
58     case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1)); break;
59     case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C)); break;
60     case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C)); break;
61     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
62     }
63   }
64   /* transfer to GPU memory */
65   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
66   /* execute transform */
67   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE));
68   /* transfer from GPU memory */
69   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
70   PetscCall(VecRestoreArray(y, &y_array));
71   PetscCall(VecRestoreArray(x, &x_array));
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatDestroy_SeqCUFFT(Mat A) {
76   Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
77 
78   PetscFunctionBegin;
79   PetscCall(PetscFree(cufft->dim));
80   if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward));
81   if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward));
82   PetscCallCUDA(cudaFree(cufft->devArray));
83   PetscCall(PetscFree(A->data));
84   PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0));
85   PetscFunctionReturn(0);
86 }
87 
88 /*@
89   MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT
90 
91   Collective
92 
93   Input Parameters:
94 + comm - MPI communicator, set to `PETSC_COMM_SELF`
95 . ndim - the ndim-dimensional transform
96 - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
97 
98   Output Parameter:
99 . A - the matrix
100 
101   Options Database Keys:
102 . -mat_cufft_plannerflags - set CUFFT planner flags
103 
104   Level: intermediate
105 
106 .seealso: `MATSEQCUFFT`
107 @*/
108 PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) {
109   Mat_CUFFT *cufft;
110   PetscInt   m = 1;
111 
112   PetscFunctionBegin;
113   PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim);
114   if (ndim) PetscValidIntPointer(dim, 3);
115   PetscValidPointer(A, 4);
116   PetscCall(MatCreate(comm, A));
117   for (PetscInt d = 0; d < ndim; ++d) {
118     PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]);
119     m *= dim[d];
120   }
121   PetscCall(MatSetSizes(*A, m, m, m, m));
122   PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT));
123 
124   PetscCall(PetscNewLog(*A, &cufft));
125   (*A)->data = (void *)cufft;
126   PetscCall(PetscMalloc1(ndim + 1, &cufft->dim));
127   PetscCall(PetscArraycpy(cufft->dim, dim, ndim));
128 
129   cufft->ndim       = ndim;
130   cufft->p_forward  = 0;
131   cufft->p_backward = 0;
132   cufft->dim[ndim]  = m;
133 
134   /* GPU memory allocation */
135   PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m));
136 
137   (*A)->ops->mult          = MatMult_SeqCUFFT;
138   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
139   (*A)->assembled          = PETSC_TRUE;
140   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
141 
142   /* get runtime options ...what options????? */
143   PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
144   PetscOptionsEnd();
145   PetscFunctionReturn(0);
146 }
147