1 2 /* 3 Provides an interface to the CUFFT package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <cuda.h> 10 #include <cuda_runtime.h> 11 #include <cufft.h> 12 EXTERN_C_END 13 14 typedef struct { 15 PetscInt ndim; 16 PetscInt *dim; 17 cufftHandle p_forward, p_backward; 18 cufftComplex *devArray; 19 } Mat_CUFFT; 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatMult_SeqCUFFT" 23 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 24 { 25 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 26 cufftComplex *devArray = cufft->devArray; 27 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 28 PetscScalar *x_array, *y_array; 29 cufftResult result; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 34 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 35 if (!cufft->p_forward) { 36 cufftResult result; 37 /* create a plan, then execute it */ 38 switch (ndim) { 39 case 1: 40 result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 41 break; 42 case 2: 43 result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 44 break; 45 case 3: 46 result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 47 break; 48 default: 49 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 50 } 51 } 52 /* transfer to GPU memory */ 53 cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 54 /* execute transform */ 55 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS); 56 /* transfer from GPU memory */ 57 cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 58 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 59 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatMultTranspose_SeqCUFFT" 65 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 66 { 67 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 68 cufftComplex *devArray = cufft->devArray; 69 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 70 PetscScalar *x_array, *y_array; 71 cufftResult result; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 76 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 77 if (!cufft->p_backward) { 78 /* create a plan, then execute it */ 79 switch (ndim) { 80 case 1: 81 result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 82 break; 83 case 2: 84 result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 85 break; 86 case 3: 87 result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 88 break; 89 default: 90 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 91 } 92 } 93 /* transfer to GPU memory */ 94 cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 95 /* execute transform */ 96 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS); 97 /* transfer from GPU memory */ 98 cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 99 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 100 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_SeqCUFFT" 106 PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 107 { 108 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 109 cufftResult result; 110 PetscErrorCode ierr; 111 112 PetscFunctionBegin; 113 ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 114 if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);} 115 if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);} 116 cudaFree(cufft->devArray); 117 ierr = PetscFree(A->data);CHKERRQ(ierr); 118 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatCreateSeqCUFFT" 124 /*@ 125 MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 126 127 Collective on MPI_Comm 128 129 Input Parameters: 130 + comm - MPI communicator, set to PETSC_COMM_SELF 131 . ndim - the ndim-dimensional transform 132 - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 133 134 Output Parameter: 135 . A - the matrix 136 137 Options Database Keys: 138 . -mat_cufft_plannerflags - set CUFFT planner flags 139 140 Level: intermediate 141 @*/ 142 PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) 143 { 144 Mat_CUFFT *cufft; 145 PetscInt m, d; 146 PetscErrorCode ierr; 147 148 PetscFunctionBegin; 149 if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 150 ierr = MatCreate(comm, A);CHKERRQ(ierr); 151 m = 1; 152 for (d = 0; d < ndim; ++d) { 153 if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 154 m *= dim[d]; 155 } 156 ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 157 ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 158 159 ierr = PetscNewLog(*A,&cufft);CHKERRQ(ierr); 160 (*A)->data = (void*) cufft; 161 ierr = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr); 162 ierr = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr); 163 164 cufft->ndim = ndim; 165 cufft->p_forward = 0; 166 cufft->p_backward = 0; 167 cufft->dim[ndim] = m; 168 169 /* GPU memory allocation */ 170 cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m); 171 172 (*A)->ops->mult = MatMult_SeqCUFFT; 173 (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 174 (*A)->assembled = PETSC_TRUE; 175 (*A)->ops->destroy = MatDestroy_SeqCUFFT; 176 177 /* get runtime options */ 178 ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 179 ierr = PetscOptionsEnd();CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182