1e133240eSMatthew G Knepley /*
2e133240eSMatthew G Knepley Provides an interface to the CUFFT package.
3c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests
4e133240eSMatthew G Knepley */
5e133240eSMatthew G Knepley
60e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h>
7af0996ceSBarry Smith #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
8e133240eSMatthew G Knepley
9e133240eSMatthew G Knepley typedef struct {
10e133240eSMatthew G Knepley PetscInt ndim;
11e133240eSMatthew G Knepley PetscInt *dim;
12e133240eSMatthew G Knepley cufftHandle p_forward, p_backward;
13e133240eSMatthew G Knepley cufftComplex *devArray;
14e133240eSMatthew G Knepley } Mat_CUFFT;
15e133240eSMatthew G Knepley
MatMult_SeqCUFFT(Mat A,Vec x,Vec y)1666976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
17d71ae5a4SJacob Faibussowitsch {
18e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
19e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray;
20e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim;
21e133240eSMatthew G Knepley PetscScalar *x_array, *y_array;
22e133240eSMatthew G Knepley
23e133240eSMatthew G Knepley PetscFunctionBegin;
249566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &x_array));
259566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array));
26e133240eSMatthew G Knepley if (!cufft->p_forward) {
27e133240eSMatthew G Knepley /* create a plan, then execute it */
28e133240eSMatthew G Knepley switch (ndim) {
29d71ae5a4SJacob Faibussowitsch case 1:
30d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1));
31d71ae5a4SJacob Faibussowitsch break;
32d71ae5a4SJacob Faibussowitsch case 2:
33d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C));
34d71ae5a4SJacob Faibussowitsch break;
35d71ae5a4SJacob Faibussowitsch case 3:
36d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C));
37d71ae5a4SJacob Faibussowitsch break;
38d71ae5a4SJacob Faibussowitsch default:
39d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
40e133240eSMatthew G Knepley }
41e133240eSMatthew G Knepley }
42e133240eSMatthew G Knepley /* transfer to GPU memory */
439566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
44e133240eSMatthew G Knepley /* execute transform */
459566063dSJacob Faibussowitsch PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD));
46e133240eSMatthew G Knepley /* transfer from GPU memory */
479566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array));
499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &x_array));
503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
51e133240eSMatthew G Knepley }
52e133240eSMatthew G Knepley
MatMultTranspose_SeqCUFFT(Mat A,Vec x,Vec y)5366976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
54d71ae5a4SJacob Faibussowitsch {
55e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
56e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray;
57e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim;
58e133240eSMatthew G Knepley PetscScalar *x_array, *y_array;
59e133240eSMatthew G Knepley
60e133240eSMatthew G Knepley PetscFunctionBegin;
619566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &x_array));
629566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array));
63e133240eSMatthew G Knepley if (!cufft->p_backward) {
64e133240eSMatthew G Knepley /* create a plan, then execute it */
65e133240eSMatthew G Knepley switch (ndim) {
66d71ae5a4SJacob Faibussowitsch case 1:
67d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1));
68d71ae5a4SJacob Faibussowitsch break;
69d71ae5a4SJacob Faibussowitsch case 2:
70d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C));
71d71ae5a4SJacob Faibussowitsch break;
72d71ae5a4SJacob Faibussowitsch case 3:
73d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C));
74d71ae5a4SJacob Faibussowitsch break;
75d71ae5a4SJacob Faibussowitsch default:
76d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
77e133240eSMatthew G Knepley }
78e133240eSMatthew G Knepley }
79e133240eSMatthew G Knepley /* transfer to GPU memory */
809566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
81e133240eSMatthew G Knepley /* execute transform */
829566063dSJacob Faibussowitsch PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE));
83e133240eSMatthew G Knepley /* transfer from GPU memory */
849566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array));
869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &x_array));
873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
88e133240eSMatthew G Knepley }
89e133240eSMatthew G Knepley
MatDestroy_SeqCUFFT(Mat A)9066976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
91d71ae5a4SJacob Faibussowitsch {
92e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
93e133240eSMatthew G Knepley
94e133240eSMatthew G Knepley PetscFunctionBegin;
959566063dSJacob Faibussowitsch PetscCall(PetscFree(cufft->dim));
969566063dSJacob Faibussowitsch if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward));
979566063dSJacob Faibussowitsch if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward));
989566063dSJacob Faibussowitsch PetscCallCUDA(cudaFree(cufft->devArray));
999566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data));
1009566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0));
1013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
102e133240eSMatthew G Knepley }
103e133240eSMatthew G Knepley
104e133240eSMatthew G Knepley /*@
10511a5261eSBarry Smith MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT
106e133240eSMatthew G Knepley
107d083f849SBarry Smith Collective
108e133240eSMatthew G Knepley
109e133240eSMatthew G Knepley Input Parameters:
11011a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
111e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform
1122ef1f0ffSBarry Smith - dim - array of size `ndim`, dim[i] contains the vector length in the i-dimension
113e133240eSMatthew G Knepley
114e133240eSMatthew G Knepley Output Parameter:
115e133240eSMatthew G Knepley . A - the matrix
116e133240eSMatthew G Knepley
1172ef1f0ffSBarry Smith Options Database Key:
1182ef1f0ffSBarry Smith . -mat_cufft_plannerflags - set CuFFT planner flags
119e133240eSMatthew G Knepley
120e133240eSMatthew G Knepley Level: intermediate
12111a5261eSBarry Smith
1221cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQCUFFT`
123e133240eSMatthew G Knepley @*/
MatCreateSeqCUFFT(MPI_Comm comm,PetscInt ndim,const PetscInt dim[],Mat * A)124d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
125d71ae5a4SJacob Faibussowitsch {
126e133240eSMatthew G Knepley Mat_CUFFT *cufft;
1275f80ce2aSJacob Faibussowitsch PetscInt m = 1;
128e133240eSMatthew G Knepley
129e133240eSMatthew G Knepley PetscFunctionBegin;
1305f80ce2aSJacob Faibussowitsch PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim);
1314f572ea9SToby Isaac if (ndim) PetscAssertPointer(dim, 3);
1324f572ea9SToby Isaac PetscAssertPointer(A, 4);
1339566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A));
1345f80ce2aSJacob Faibussowitsch for (PetscInt d = 0; d < ndim; ++d) {
1355f80ce2aSJacob Faibussowitsch PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]);
136e133240eSMatthew G Knepley m *= dim[d];
137e133240eSMatthew G Knepley }
1389566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, m, m, m));
1399566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT));
140e133240eSMatthew G Knepley
1414dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&cufft));
142e133240eSMatthew G Knepley (*A)->data = (void *)cufft;
1439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim + 1, &cufft->dim));
1449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(cufft->dim, dim, ndim));
1452205254eSKarl Rupp
146e133240eSMatthew G Knepley cufft->ndim = ndim;
147e133240eSMatthew G Knepley cufft->p_forward = 0;
148e133240eSMatthew G Knepley cufft->p_backward = 0;
149e133240eSMatthew G Knepley cufft->dim[ndim] = m;
150e133240eSMatthew G Knepley
151e133240eSMatthew G Knepley /* GPU memory allocation */
1529566063dSJacob Faibussowitsch PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m));
153e133240eSMatthew G Knepley
154e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT;
155e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
156e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE;
157e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT;
158e133240eSMatthew G Knepley
1595f80ce2aSJacob Faibussowitsch /* get runtime options ...what options????? */
160*f4f49eeaSPierre Jolivet PetscOptionsBegin(comm, ((PetscObject)*A)->prefix, "CUFFT Options", "Mat");
161d0609cedSBarry Smith PetscOptionsEnd();
1623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
163e133240eSMatthew G Knepley }
164