1 #include <petscmat.h> 2 #include <h2opus.h> 3 4 #ifndef __MATH2OPUS_HPP 5 #define __MATH2OPUS_HPP 6 7 class PetscMatrixSampler : public HMatrixSampler 8 { 9 protected: 10 Mat A; 11 typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, H2Opus_Real>::type HRealVector; 12 typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, int>::type HIntVector; 13 HIntVector hindexmap; 14 HRealVector hbuffer_in,hbuffer_out; 15 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 16 H2OpusDeviceVector<int> dindexmap; 17 H2OpusDeviceVector<H2Opus_Real> dbuffer_in,dbuffer_out; 18 #endif 19 bool gpusampling; 20 h2opusComputeStream_t stream; 21 22 private: 23 void Init(); 24 void VerifyBuffers(int); 25 void PermuteBuffersIn(int,H2Opus_Real*,H2Opus_Real**,H2Opus_Real*,H2Opus_Real**); 26 void PermuteBuffersOut(int,H2Opus_Real*); 27 28 public: 29 PetscMatrixSampler(); 30 PetscMatrixSampler(Mat); 31 ~PetscMatrixSampler(); 32 void SetSamplingMat(Mat); 33 void SetIndexMap(int,int*); 34 void SetGPUSampling(bool); 35 void SetStream(h2opusComputeStream_t); 36 virtual void sample(H2Opus_Real*,H2Opus_Real*,int); 37 Mat GetSamplingMat() { return A; } 38 }; 39 40 void PetscMatrixSampler::Init() 41 { 42 this->A = NULL; 43 this->gpusampling = false; 44 this->stream = NULL; 45 } 46 47 PetscMatrixSampler::PetscMatrixSampler() 48 { 49 Init(); 50 } 51 52 PetscMatrixSampler::PetscMatrixSampler(Mat A) 53 { 54 Init(); 55 SetSamplingMat(A); 56 } 57 58 void PetscMatrixSampler::SetSamplingMat(Mat A) 59 { 60 PetscErrorCode ierr; 61 PetscMPIInt size; 62 63 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRV(ierr); 64 if (size > 1) CHKERRV(PETSC_ERR_SUP); 65 ierr = PetscObjectReference((PetscObject)A);CHKERRV(ierr); 66 ierr = MatDestroy(&this->A);CHKERRV(ierr); 67 this->A = A; 68 } 69 70 void PetscMatrixSampler::SetStream(h2opusComputeStream_t stream) 71 { 72 this->stream = stream; 73 } 74 75 void PetscMatrixSampler::SetIndexMap(int n,int *indexmap) 76 { 77 copyVector(this->hindexmap, indexmap, n, H2OPUS_HWTYPE_CPU); 78 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 79 copyVector(this->dindexmap, indexmap, n, H2OPUS_HWTYPE_CPU); 80 #endif 81 } 82 83 void PetscMatrixSampler::VerifyBuffers(int nv) 84 { 85 if (this->hindexmap.size()) { 86 size_t n = this->hindexmap.size(); 87 if (!this->gpusampling) { 88 if (hbuffer_in.size() < (size_t)n * nv) 89 hbuffer_in.resize(n * nv); 90 if (hbuffer_out.size() < (size_t)n * nv) 91 hbuffer_out.resize(n * nv); 92 } else { 93 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 94 if (dbuffer_in.size() < (size_t)n * nv) 95 dbuffer_in.resize(n * nv); 96 if (dbuffer_out.size() < (size_t)n * nv) 97 dbuffer_out.resize(n * nv); 98 #endif 99 } 100 } 101 } 102 103 void PetscMatrixSampler::PermuteBuffersIn(int nv, H2Opus_Real *v, H2Opus_Real **w, H2Opus_Real *ov, H2Opus_Real **ow) 104 { 105 *w = v; 106 *ow = ov; 107 VerifyBuffers(nv); 108 if (this->hindexmap.size()) { 109 size_t n = this->hindexmap.size(); 110 if (!this->gpusampling) { 111 permute_vectors(v, this->hbuffer_in.data(), n, nv, this->hindexmap.data(), 1, H2OPUS_HWTYPE_CPU, 112 this->stream); 113 *w = this->hbuffer_in.data(); 114 *ow = this->hbuffer_out.data(); 115 } else { 116 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 117 permute_vectors(v, this->dbuffer_in.data(), n, nv, this->dindexmap.data(), 1, H2OPUS_HWTYPE_GPU, 118 this->stream); 119 *w = this->dbuffer_in.data(); 120 *ow = this->dbuffer_out.data(); 121 #endif 122 } 123 } 124 } 125 126 void PetscMatrixSampler::PermuteBuffersOut(int nv, H2Opus_Real *v) 127 { 128 VerifyBuffers(nv); 129 if (this->hindexmap.size()) { 130 size_t n = this->hindexmap.size(); 131 if (!this->gpusampling) { 132 permute_vectors(this->hbuffer_out.data(), v, n, nv, this->hindexmap.data(), 0, H2OPUS_HWTYPE_CPU, 133 this->stream); 134 } else { 135 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 136 permute_vectors(this->dbuffer_out.data(), v, n, nv, this->dindexmap.data(), 0, H2OPUS_HWTYPE_GPU, 137 this->stream); 138 #endif 139 } 140 } 141 } 142 143 void PetscMatrixSampler::SetGPUSampling(bool gpusampling) 144 { 145 this->gpusampling = gpusampling; 146 } 147 148 PetscMatrixSampler::~PetscMatrixSampler() 149 { 150 PetscErrorCode ierr; 151 152 ierr = MatDestroy(&A);CHKERRV(ierr); 153 } 154 155 void PetscMatrixSampler::sample(H2Opus_Real *x, H2Opus_Real *y, int samples) 156 { 157 PetscErrorCode ierr; 158 MPI_Comm comm = PetscObjectComm((PetscObject)this->A); 159 Mat X = NULL,Y = NULL; 160 PetscInt M,N,m,n; 161 H2Opus_Real *px,*py; 162 163 if (!this->A) CHKERRV(PETSC_ERR_PLIB); 164 ierr = MatGetSize(this->A,&M,&N);CHKERRV(ierr); 165 ierr = MatGetLocalSize(this->A,&m,&n);CHKERRV(ierr); 166 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRV(ierr); 167 PermuteBuffersIn(samples,x,&px,y,&py); 168 if (!this->gpusampling) { 169 ierr = MatCreateDense(comm,n,PETSC_DECIDE,N,samples,px,&X);CHKERRV(ierr); 170 ierr = MatCreateDense(comm,m,PETSC_DECIDE,M,samples,py,&Y);CHKERRV(ierr); 171 } else { 172 #if defined(PETSC_HAVE_CUDA) 173 ierr = MatCreateDenseCUDA(comm,n,PETSC_DECIDE,N,samples,px,&X);CHKERRV(ierr); 174 ierr = MatCreateDenseCUDA(comm,m,PETSC_DECIDE,M,samples,py,&Y);CHKERRV(ierr); 175 #endif 176 } 177 ierr = PetscLogObjectParent((PetscObject)this->A,(PetscObject)X);CHKERRV(ierr); 178 ierr = PetscLogObjectParent((PetscObject)this->A,(PetscObject)Y);CHKERRV(ierr); 179 ierr = MatMatMult(this->A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRV(ierr); 180 #if defined(PETSC_HAVE_CUDA) 181 if (this->gpusampling) { 182 const PetscScalar *dummy; 183 ierr = MatDenseCUDAGetArrayRead(Y,&dummy);CHKERRV(ierr); 184 ierr = MatDenseCUDARestoreArrayRead(Y,&dummy);CHKERRV(ierr); 185 } 186 #endif 187 PermuteBuffersOut(samples,y); 188 ierr = MatDestroy(&X);CHKERRV(ierr); 189 ierr = MatDestroy(&Y);CHKERRV(ierr); 190 } 191 192 #endif 193