1 2 /* 3 Provides an interface to the CUFFT package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <petscdevice.h> 8 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 9 10 typedef struct { 11 PetscInt ndim; 12 PetscInt *dim; 13 cufftHandle p_forward, p_backward; 14 cufftComplex *devArray; 15 } Mat_CUFFT; 16 17 PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) { 18 Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 19 cufftComplex *devArray = cufft->devArray; 20 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 21 PetscScalar *x_array, *y_array; 22 23 PetscFunctionBegin; 24 PetscCall(VecGetArray(x, &x_array)); 25 PetscCall(VecGetArray(y, &y_array)); 26 if (!cufft->p_forward) { 27 /* create a plan, then execute it */ 28 switch (ndim) { 29 case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1)); break; 30 case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C)); break; 31 case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C)); break; 32 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 33 } 34 } 35 /* transfer to GPU memory */ 36 PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice)); 37 /* execute transform */ 38 PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD)); 39 /* transfer from GPU memory */ 40 PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost)); 41 PetscCall(VecRestoreArray(y, &y_array)); 42 PetscCall(VecRestoreArray(x, &x_array)); 43 PetscFunctionReturn(0); 44 } 45 46 PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) { 47 Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 48 cufftComplex *devArray = cufft->devArray; 49 PetscInt ndim = cufft->ndim, *dim = cufft->dim; 50 PetscScalar *x_array, *y_array; 51 52 PetscFunctionBegin; 53 PetscCall(VecGetArray(x, &x_array)); 54 PetscCall(VecGetArray(y, &y_array)); 55 if (!cufft->p_backward) { 56 /* create a plan, then execute it */ 57 switch (ndim) { 58 case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1)); break; 59 case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C)); break; 60 case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C)); break; 61 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 62 } 63 } 64 /* transfer to GPU memory */ 65 PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice)); 66 /* execute transform */ 67 PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE)); 68 /* transfer from GPU memory */ 69 PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost)); 70 PetscCall(VecRestoreArray(y, &y_array)); 71 PetscCall(VecRestoreArray(x, &x_array)); 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatDestroy_SeqCUFFT(Mat A) { 76 Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 77 78 PetscFunctionBegin; 79 PetscCall(PetscFree(cufft->dim)); 80 if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward)); 81 if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward)); 82 PetscCallCUDA(cudaFree(cufft->devArray)); 83 PetscCall(PetscFree(A->data)); 84 PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0)); 85 PetscFunctionReturn(0); 86 } 87 88 /*@ 89 MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT 90 91 Collective 92 93 Input Parameters: 94 + comm - MPI communicator, set to `PETSC_COMM_SELF` 95 . ndim - the ndim-dimensional transform 96 - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 97 98 Output Parameter: 99 . A - the matrix 100 101 Options Database Keys: 102 . -mat_cufft_plannerflags - set CUFFT planner flags 103 104 Level: intermediate 105 106 .seealso: `MATSEQCUFFT` 107 @*/ 108 PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) { 109 Mat_CUFFT *cufft; 110 PetscInt m = 1; 111 112 PetscFunctionBegin; 113 PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim); 114 if (ndim) PetscValidIntPointer(dim, 3); 115 PetscValidPointer(A, 4); 116 PetscCall(MatCreate(comm, A)); 117 for (PetscInt d = 0; d < ndim; ++d) { 118 PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]); 119 m *= dim[d]; 120 } 121 PetscCall(MatSetSizes(*A, m, m, m, m)); 122 PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT)); 123 124 PetscCall(PetscNewLog(*A, &cufft)); 125 (*A)->data = (void *)cufft; 126 PetscCall(PetscMalloc1(ndim + 1, &cufft->dim)); 127 PetscCall(PetscArraycpy(cufft->dim, dim, ndim)); 128 129 cufft->ndim = ndim; 130 cufft->p_forward = 0; 131 cufft->p_backward = 0; 132 cufft->dim[ndim] = m; 133 134 /* GPU memory allocation */ 135 PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m)); 136 137 (*A)->ops->mult = MatMult_SeqCUFFT; 138 (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 139 (*A)->assembled = PETSC_TRUE; 140 (*A)->ops->destroy = MatDestroy_SeqCUFFT; 141 142 /* get runtime options ...what options????? */ 143 PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat"); 144 PetscOptionsEnd(); 145 PetscFunctionReturn(0); 146 } 147