1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the CUFFT package. 5 Testing examples can be found in ~src/mat/examples/tests 6 */ 7 8 #include "private/matimpl.h" /*I "petscmat.h" I*/ 9 EXTERN_C_BEGIN 10 #include <cuda.h> 11 #include <cuda_runtime.h> 12 #include <cufft.h> 13 EXTERN_C_END 14 15 typedef struct { 16 PetscInt ndim; 17 PetscInt *dim; 18 cufftHandle p_forward, p_backward; 19 cufftComplex *devArray; 20 } Mat_CUFFT; 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatMult_SeqCUFFT" 24 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 25 { 26 Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 27 cufftComplex *devArray = cufft->devArray; 28 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 29 PetscScalar *x_array, *y_array; 30 cufftResult result; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 35 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 36 if (!cufft->p_forward) { 37 cufftResult result; 38 /* create a plan, then execute it */ 39 switch(ndim) { 40 case 1: 41 result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 42 break; 43 case 2: 44 result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 45 break; 46 case 3: 47 result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 48 break; 49 default: 50 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 51 } 52 } 53 /* transfer to GPU memory */ 54 cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 55 /* execute transform */ 56 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS); 57 /* transfer from GPU memory */ 58 cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 59 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 60 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMultTranspose_SeqCUFFT" 66 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 67 { 68 Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 69 cufftComplex *devArray = cufft->devArray; 70 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 71 PetscScalar *x_array, *y_array; 72 cufftResult result; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 77 ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 78 if (!cufft->p_backward) { 79 /* create a plan, then execute it */ 80 switch(ndim) { 81 case 1: 82 result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 83 break; 84 case 2: 85 result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 86 break; 87 case 3: 88 result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 89 break; 90 default: 91 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 92 } 93 } 94 /* transfer to GPU memory */ 95 cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 96 /* execute transform */ 97 result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS); 98 /* transfer from GPU memory */ 99 cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 100 ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 101 ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatDestroy_SeqCUFFT" 107 PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 108 { 109 Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 110 cufftResult result; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 115 if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);} 116 if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);} 117 cudaFree(cufft->devArray); 118 ierr = PetscFree(cufft);CHKERRQ(ierr); 119 ierr = PetscObjectChangeTypeName((PetscObject) A, PETSC_NULL);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatCreateSeqCUFFT" 125 /*@ 126 MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 127 128 Collective on MPI_Comm 129 130 Input Parameters: 131 + comm - MPI communicator, set to PETSC_COMM_SELF 132 . ndim - the ndim-dimensional transform 133 - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 134 135 Output Parameter: 136 . A - the matrix 137 138 Options Database Keys: 139 . -mat_cufft_plannerflags - set CUFFT planner flags 140 141 Level: intermediate 142 @*/ 143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat* A) 144 { 145 Mat_CUFFT *cufft; 146 PetscInt m, d; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 151 ierr = MatCreate(comm, A);CHKERRQ(ierr); 152 m = 1; 153 for(d = 0; d < ndim; ++d){ 154 if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 155 m *= dim[d]; 156 } 157 ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 158 ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 159 160 ierr = PetscNewLog(*A, Mat_CUFFT, &cufft);CHKERRQ(ierr); 161 (*A)->data = (void*) cufft; 162 ierr = PetscMalloc((ndim+1)*sizeof(PetscInt), &cufft->dim);CHKERRQ(ierr); 163 ierr = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr); 164 cufft->ndim = ndim; 165 cufft->p_forward = 0; 166 cufft->p_backward = 0; 167 cufft->dim[ndim] = m; 168 169 /* GPU memory allocation */ 170 cudaMalloc((void **) &cufft->devArray, sizeof(cufftComplex)*m); 171 172 (*A)->ops->mult = MatMult_SeqCUFFT; 173 (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 174 (*A)->assembled = PETSC_TRUE; 175 (*A)->ops->destroy = MatDestroy_SeqCUFFT; 176 177 /* get runtime options */ 178 ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 179 ierr = PetscOptionsEnd();CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182