1 2 /* 3 Provides an interface to the CUFFT package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <petscdevice.h> 8 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 9 10 typedef struct { 11 PetscInt ndim; 12 PetscInt *dim; 13 cufftHandle p_forward, p_backward; 14 cufftComplex *devArray; 15 } Mat_CUFFT; 16 17 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 18 { 19 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 20 cufftComplex *devArray = cufft->devArray; 21 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 22 PetscScalar *x_array, *y_array; 23 cufftResult result; 24 cudaError_t cerr; 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 29 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 30 if (!cufft->p_forward) { 31 cufftResult result; 32 /* create a plan, then execute it */ 33 switch (ndim) { 34 case 1: 35 result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result); 36 break; 37 case 2: 38 result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result); 39 break; 40 case 3: 41 result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result); 42 break; 43 default: 44 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 45 } 46 } 47 /* transfer to GPU memory */ 48 cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr); 49 /* execute transform */ 50 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRCUFFT(result); 51 /* transfer from GPU memory */ 52 cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr); 53 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 54 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 59 { 60 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 61 cufftComplex *devArray = cufft->devArray; 62 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 63 PetscScalar *x_array, *y_array; 64 cufftResult result; 65 cudaError_t cerr; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 70 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 71 if (!cufft->p_backward) { 72 /* create a plan, then execute it */ 73 switch (ndim) { 74 case 1: 75 result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result); 76 break; 77 case 2: 78 result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result); 79 break; 80 case 3: 81 result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result); 82 break; 83 default: 84 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 85 } 86 } 87 /* transfer to GPU memory */ 88 cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr); 89 /* execute transform */ 90 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRCUFFT(result); 91 /* transfer from GPU memory */ 92 cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr); 93 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 94 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 98 PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 99 { 100 Mat_CUFFT *cufft = (Mat_CUFFT*) A->data; 101 cufftResult result; 102 cudaError_t cerr; 103 PetscErrorCode ierr; 104 105 PetscFunctionBegin; 106 ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 107 if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRCUFFT(result);} 108 if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRCUFFT(result);} 109 cerr = cudaFree(cufft->devArray);CHKERRCUDA(cerr); 110 ierr = PetscFree(A->data);CHKERRQ(ierr); 111 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 /*@ 116 MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 117 118 Collective 119 120 Input Parameters: 121 + comm - MPI communicator, set to PETSC_COMM_SELF 122 . ndim - the ndim-dimensional transform 123 - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 124 125 Output Parameter: 126 . A - the matrix 127 128 Options Database Keys: 129 . -mat_cufft_plannerflags - set CUFFT planner flags 130 131 Level: intermediate 132 @*/ 133 PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) 134 { 135 Mat_CUFFT *cufft; 136 PetscInt m, d; 137 PetscErrorCode ierr; 138 cudaError_t cerr; 139 140 PetscFunctionBegin; 141 if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 142 ierr = MatCreate(comm, A);CHKERRQ(ierr); 143 m = 1; 144 for (d = 0; d < ndim; ++d) { 145 if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 146 m *= dim[d]; 147 } 148 ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 149 ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 150 151 ierr = PetscNewLog(*A,&cufft);CHKERRQ(ierr); 152 (*A)->data = (void*) cufft; 153 ierr = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr); 154 ierr = PetscArraycpy(cufft->dim, dim, ndim);CHKERRQ(ierr); 155 156 cufft->ndim = ndim; 157 cufft->p_forward = 0; 158 cufft->p_backward = 0; 159 cufft->dim[ndim] = m; 160 161 /* GPU memory allocation */ 162 cerr = cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);CHKERRCUDA(cerr); 163 164 (*A)->ops->mult = MatMult_SeqCUFFT; 165 (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 166 (*A)->assembled = PETSC_TRUE; 167 (*A)->ops->destroy = MatDestroy_SeqCUFFT; 168 169 /* get runtime options */ 170 ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 171 ierr = PetscOptionsEnd();CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174