1 2 /* 3 Provides an interface to the CUFFT package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <cuda.h> 10 #include <cuda_runtime.h> 11 #include <cufft.h> 12 EXTERN_C_END 13 14 typedef struct { 15 PetscInt ndim; 16 PetscInt *dim; 17 cufftHandle p_forward, p_backward; 18 cufftComplex *devArray; 19 } Mat_CUFFT; 20 21 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 22 { 23 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 24 cufftComplex *devArray = cufft->devArray; 25 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 26 PetscScalar *x_array, *y_array; 27 cufftResult result; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 32 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 33 if (!cufft->p_forward) { 34 cufftResult result; 35 /* create a plan, then execute it */ 36 switch (ndim) { 37 case 1: 38 result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 39 break; 40 case 2: 41 result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 42 break; 43 case 3: 44 result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 45 break; 46 default: 47 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 48 } 49 } 50 /* transfer to GPU memory */ 51 cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 52 /* execute transform */ 53 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS); 54 /* transfer from GPU memory */ 55 cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 56 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 57 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 62 { 63 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 64 cufftComplex *devArray = cufft->devArray; 65 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 66 PetscScalar *x_array, *y_array; 67 cufftResult result; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 72 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 73 if (!cufft->p_backward) { 74 /* create a plan, then execute it */ 75 switch (ndim) { 76 case 1: 77 result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 78 break; 79 case 2: 80 result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 81 break; 82 case 3: 83 result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 84 break; 85 default: 86 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 87 } 88 } 89 /* transfer to GPU memory */ 90 cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 91 /* execute transform */ 92 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS); 93 /* transfer from GPU memory */ 94 cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 95 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 96 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 101 { 102 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 103 cufftResult result; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 108 if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);} 109 if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);} 110 cudaFree(cufft->devArray); 111 ierr = PetscFree(A->data);CHKERRQ(ierr); 112 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 /*@ 117 MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 118 119 Collective 120 121 Input Parameters: 122 + comm - MPI communicator, set to PETSC_COMM_SELF 123 . ndim - the ndim-dimensional transform 124 - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 125 126 Output Parameter: 127 . A - the matrix 128 129 Options Database Keys: 130 . -mat_cufft_plannerflags - set CUFFT planner flags 131 132 Level: intermediate 133 @*/ 134 PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) 135 { 136 Mat_CUFFT *cufft; 137 PetscInt m, d; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 142 ierr = MatCreate(comm, A);CHKERRQ(ierr); 143 m = 1; 144 for (d = 0; d < ndim; ++d) { 145 if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 146 m *= dim[d]; 147 } 148 ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 149 ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 150 151 ierr = PetscNewLog(*A,&cufft);CHKERRQ(ierr); 152 (*A)->data = (void*) cufft; 153 ierr = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr); 154 ierr = PetscArraycpy(cufft->dim, dim, ndim);CHKERRQ(ierr); 155 156 cufft->ndim = ndim; 157 cufft->p_forward = 0; 158 cufft->p_backward = 0; 159 cufft->dim[ndim] = m; 160 161 /* GPU memory allocation */ 162 cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m); 163 164 (*A)->ops->mult = MatMult_SeqCUFFT; 165 (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 166 (*A)->assembled = PETSC_TRUE; 167 (*A)->ops->destroy = MatDestroy_SeqCUFFT; 168 169 /* get runtime options */ 170 ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 171 ierr = PetscOptionsEnd();CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174