1e133240eSMatthew G Knepley 2e133240eSMatthew G Knepley /* 3e133240eSMatthew G Knepley Provides an interface to the CUFFT package. 4e133240eSMatthew G Knepley Testing examples can be found in ~src/mat/examples/tests 5e133240eSMatthew G Knepley */ 6e133240eSMatthew G Knepley 7b45d2f2cSJed Brown #include <petsc-private/matimpl.h> /*I "petscmat.h" I*/ 8e133240eSMatthew G Knepley EXTERN_C_BEGIN 9e133240eSMatthew G Knepley #include <cuda.h> 10e133240eSMatthew G Knepley #include <cuda_runtime.h> 11e133240eSMatthew G Knepley #include <cufft.h> 12e133240eSMatthew G Knepley EXTERN_C_END 13e133240eSMatthew G Knepley 14e133240eSMatthew G Knepley typedef struct { 15e133240eSMatthew G Knepley PetscInt ndim; 16e133240eSMatthew G Knepley PetscInt *dim; 17e133240eSMatthew G Knepley cufftHandle p_forward, p_backward; 18e133240eSMatthew G Knepley cufftComplex *devArray; 19e133240eSMatthew G Knepley } Mat_CUFFT; 20e133240eSMatthew G Knepley 21e133240eSMatthew G Knepley #undef __FUNCT__ 22e133240eSMatthew G Knepley #define __FUNCT__ "MatMult_SeqCUFFT" 23e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 24e133240eSMatthew G Knepley { 25e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 26e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 27e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 28e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 29e133240eSMatthew G Knepley cufftResult result; 30e133240eSMatthew G Knepley PetscErrorCode ierr; 31e133240eSMatthew G Knepley 32e133240eSMatthew G Knepley PetscFunctionBegin; 33e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 34e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 35e133240eSMatthew G Knepley if (!cufft->p_forward) { 36e133240eSMatthew G Knepley cufftResult result; 37e133240eSMatthew G Knepley /* create a plan, then execute it */ 38e133240eSMatthew G Knepley switch (ndim) { 39e133240eSMatthew G Knepley case 1: 40e133240eSMatthew G Knepley result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 41e133240eSMatthew G Knepley break; 42e133240eSMatthew G Knepley case 2: 43e133240eSMatthew G Knepley result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 44e133240eSMatthew G Knepley break; 45e133240eSMatthew G Knepley case 3: 46e133240eSMatthew G Knepley result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 47e133240eSMatthew G Knepley break; 48e133240eSMatthew G Knepley default: 49e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 50e133240eSMatthew G Knepley } 51e133240eSMatthew G Knepley } 52e133240eSMatthew G Knepley /* transfer to GPU memory */ 53e133240eSMatthew G Knepley cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 54e133240eSMatthew G Knepley /* execute transform */ 55e133240eSMatthew G Knepley result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS); 56e133240eSMatthew G Knepley /* transfer from GPU memory */ 57e133240eSMatthew G Knepley cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 58e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 59e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 60e133240eSMatthew G Knepley PetscFunctionReturn(0); 61e133240eSMatthew G Knepley } 62e133240eSMatthew G Knepley 63e133240eSMatthew G Knepley #undef __FUNCT__ 64e133240eSMatthew G Knepley #define __FUNCT__ "MatMultTranspose_SeqCUFFT" 65e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 66e133240eSMatthew G Knepley { 67e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 68e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 69e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 70e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 71e133240eSMatthew G Knepley cufftResult result; 72e133240eSMatthew G Knepley PetscErrorCode ierr; 73e133240eSMatthew G Knepley 74e133240eSMatthew G Knepley PetscFunctionBegin; 75e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 76e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 77e133240eSMatthew G Knepley if (!cufft->p_backward) { 78e133240eSMatthew G Knepley /* create a plan, then execute it */ 79e133240eSMatthew G Knepley switch (ndim) { 80e133240eSMatthew G Knepley case 1: 81e133240eSMatthew G Knepley result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 82e133240eSMatthew G Knepley break; 83e133240eSMatthew G Knepley case 2: 84e133240eSMatthew G Knepley result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 85e133240eSMatthew G Knepley break; 86e133240eSMatthew G Knepley case 3: 87e133240eSMatthew G Knepley result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 88e133240eSMatthew G Knepley break; 89e133240eSMatthew G Knepley default: 90e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 91e133240eSMatthew G Knepley } 92e133240eSMatthew G Knepley } 93e133240eSMatthew G Knepley /* transfer to GPU memory */ 94e133240eSMatthew G Knepley cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 95e133240eSMatthew G Knepley /* execute transform */ 96e133240eSMatthew G Knepley result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS); 97e133240eSMatthew G Knepley /* transfer from GPU memory */ 98e133240eSMatthew G Knepley cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 99e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 100e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 101e133240eSMatthew G Knepley PetscFunctionReturn(0); 102e133240eSMatthew G Knepley } 103e133240eSMatthew G Knepley 104e133240eSMatthew G Knepley #undef __FUNCT__ 105e133240eSMatthew G Knepley #define __FUNCT__ "MatDestroy_SeqCUFFT" 106e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 107e133240eSMatthew G Knepley { 108e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 109e133240eSMatthew G Knepley cufftResult result; 110e133240eSMatthew G Knepley PetscErrorCode ierr; 111e133240eSMatthew G Knepley 112e133240eSMatthew G Knepley PetscFunctionBegin; 113e133240eSMatthew G Knepley ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 114e133240eSMatthew G Knepley if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);} 115e133240eSMatthew G Knepley if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);} 116e133240eSMatthew G Knepley cudaFree(cufft->devArray); 117bf0cc555SLisandro Dalcin ierr = PetscFree(A->data);CHKERRQ(ierr); 118bf0cc555SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 119e133240eSMatthew G Knepley PetscFunctionReturn(0); 120e133240eSMatthew G Knepley } 121e133240eSMatthew G Knepley 122e133240eSMatthew G Knepley #undef __FUNCT__ 123e133240eSMatthew G Knepley #define __FUNCT__ "MatCreateSeqCUFFT" 124e133240eSMatthew G Knepley /*@ 125e133240eSMatthew G Knepley MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 126e133240eSMatthew G Knepley 127e133240eSMatthew G Knepley Collective on MPI_Comm 128e133240eSMatthew G Knepley 129e133240eSMatthew G Knepley Input Parameters: 130e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF 131e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform 132e133240eSMatthew G Knepley - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 133e133240eSMatthew G Knepley 134e133240eSMatthew G Knepley Output Parameter: 135e133240eSMatthew G Knepley . A - the matrix 136e133240eSMatthew G Knepley 137e133240eSMatthew G Knepley Options Database Keys: 138e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags 139e133240eSMatthew G Knepley 140e133240eSMatthew G Knepley Level: intermediate 141e133240eSMatthew G Knepley @*/ 1427087cfbeSBarry Smith PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) 143e133240eSMatthew G Knepley { 144e133240eSMatthew G Knepley Mat_CUFFT *cufft; 145e133240eSMatthew G Knepley PetscInt m, d; 146e133240eSMatthew G Knepley PetscErrorCode ierr; 147e133240eSMatthew G Knepley 148e133240eSMatthew G Knepley PetscFunctionBegin; 149e133240eSMatthew G Knepley if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 150e133240eSMatthew G Knepley ierr = MatCreate(comm, A);CHKERRQ(ierr); 151e133240eSMatthew G Knepley m = 1; 152e133240eSMatthew G Knepley for (d = 0; d < ndim; ++d) { 153e133240eSMatthew G Knepley if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 154e133240eSMatthew G Knepley m *= dim[d]; 155e133240eSMatthew G Knepley } 156e133240eSMatthew G Knepley ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 157e133240eSMatthew G Knepley ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 158e133240eSMatthew G Knepley 159e133240eSMatthew G Knepley ierr = PetscNewLog(*A, Mat_CUFFT, &cufft);CHKERRQ(ierr); 160e133240eSMatthew G Knepley (*A)->data = (void*) cufft; 161e133240eSMatthew G Knepley ierr = PetscMalloc((ndim+1)*sizeof(PetscInt), &cufft->dim);CHKERRQ(ierr); 162e133240eSMatthew G Knepley ierr = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr); 163*2205254eSKarl Rupp 164e133240eSMatthew G Knepley cufft->ndim = ndim; 165e133240eSMatthew G Knepley cufft->p_forward = 0; 166e133240eSMatthew G Knepley cufft->p_backward = 0; 167e133240eSMatthew G Knepley cufft->dim[ndim] = m; 168e133240eSMatthew G Knepley 169e133240eSMatthew G Knepley /* GPU memory allocation */ 170e133240eSMatthew G Knepley cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m); 171e133240eSMatthew G Knepley 172e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT; 173e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 174e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE; 175e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT; 176e133240eSMatthew G Knepley 177e133240eSMatthew G Knepley /* get runtime options */ 178e133240eSMatthew G Knepley ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 179e133240eSMatthew G Knepley ierr = PetscOptionsEnd();CHKERRQ(ierr); 180e133240eSMatthew G Knepley PetscFunctionReturn(0); 181e133240eSMatthew G Knepley } 182