xref: /petsc/src/mat/impls/h2opus/math2opussampler.hpp (revision 2fa40bb9206b96114faa7cb222621ec184d31cd2)
1 #include <petscmat.h>
2 #include <h2opus.h>
3 
4 #ifndef __MATH2OPUS_HPP
5 #define __MATH2OPUS_HPP
6 
7 class PetscMatrixSampler : public HMatrixSampler
8 {
9 protected:
10   Mat  A;
11   typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, H2Opus_Real>::type HRealVector;
12   typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, int>::type HIntVector;
13   HIntVector hindexmap;
14   HRealVector hbuffer_in,hbuffer_out;
15 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
16   H2OpusDeviceVector<int> dindexmap;
17   H2OpusDeviceVector<H2Opus_Real> dbuffer_in,dbuffer_out;
18 #endif
19   bool gpusampling;
20   h2opusComputeStream_t stream;
21 
22 private:
23   void Init();
24   void VerifyBuffers(int);
25   void PermuteBuffersIn(int,H2Opus_Real*,H2Opus_Real**,H2Opus_Real*,H2Opus_Real**);
26   void PermuteBuffersOut(int,H2Opus_Real*);
27 
28 public:
29   PetscMatrixSampler();
30   PetscMatrixSampler(Mat);
31   ~PetscMatrixSampler();
32   void SetSamplingMat(Mat);
33   void SetIndexMap(int,int*);
34   void SetGPUSampling(bool);
35   void SetStream(h2opusComputeStream_t);
36   virtual void sample(H2Opus_Real*,H2Opus_Real*,int);
37   Mat GetSamplingMat() { return A; }
38 };
39 
40 void PetscMatrixSampler::Init()
41 {
42   this->A = NULL;
43   this->gpusampling = false;
44   this->stream = NULL;
45 }
46 
47 PetscMatrixSampler::PetscMatrixSampler()
48 {
49   Init();
50 }
51 
52 PetscMatrixSampler::PetscMatrixSampler(Mat A)
53 {
54   Init();
55   SetSamplingMat(A);
56 }
57 
58 void PetscMatrixSampler::SetSamplingMat(Mat A)
59 {
60   PetscErrorCode ierr;
61   PetscMPIInt    size;
62 
63   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRV(ierr);
64   if (size > 1) CHKERRV(PETSC_ERR_SUP);
65   ierr = PetscObjectReference((PetscObject)A);CHKERRV(ierr);
66   ierr = MatDestroy(&this->A);CHKERRV(ierr);
67   this->A = A;
68 }
69 
70 void PetscMatrixSampler::SetStream(h2opusComputeStream_t stream)
71 {
72   this->stream = stream;
73 }
74 
75 void PetscMatrixSampler::SetIndexMap(int n,int *indexmap)
76 {
77   copyVector(this->hindexmap, indexmap, n, H2OPUS_HWTYPE_CPU);
78 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
79   copyVector(this->dindexmap, indexmap, n, H2OPUS_HWTYPE_CPU);
80 #endif
81 }
82 
83 void PetscMatrixSampler::VerifyBuffers(int nv)
84 {
85   if (this->hindexmap.size()) {
86     size_t n = this->hindexmap.size();
87     if (!this->gpusampling) {
88       if (hbuffer_in.size() < (size_t)n * nv)
89           hbuffer_in.resize(n * nv);
90       if (hbuffer_out.size() < (size_t)n * nv)
91           hbuffer_out.resize(n * nv);
92     } else {
93 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
94       if (dbuffer_in.size() < (size_t)n * nv)
95           dbuffer_in.resize(n * nv);
96       if (dbuffer_out.size() < (size_t)n * nv)
97           dbuffer_out.resize(n * nv);
98 #endif
99     }
100   }
101 }
102 
103 void PetscMatrixSampler::PermuteBuffersIn(int nv, H2Opus_Real *v, H2Opus_Real **w, H2Opus_Real *ov, H2Opus_Real **ow)
104 {
105   *w = v;
106   *ow = ov;
107   VerifyBuffers(nv);
108   if (this->hindexmap.size()) {
109     size_t n = this->hindexmap.size();
110     if (!this->gpusampling) {
111       permute_vectors(v, this->hbuffer_in.data(), n, nv, this->hindexmap.data(), 1, H2OPUS_HWTYPE_CPU,
112                       this->stream);
113       *w = this->hbuffer_in.data();
114       *ow = this->hbuffer_out.data();
115     } else {
116 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
117       permute_vectors(v, this->dbuffer_in.data(), n, nv, this->dindexmap.data(), 1, H2OPUS_HWTYPE_GPU,
118                       this->stream);
119       *w = this->dbuffer_in.data();
120       *ow = this->dbuffer_out.data();
121 #endif
122     }
123   }
124 }
125 
126 void PetscMatrixSampler::PermuteBuffersOut(int nv, H2Opus_Real *v)
127 {
128   VerifyBuffers(nv);
129   if (this->hindexmap.size()) {
130     size_t n = this->hindexmap.size();
131     if (!this->gpusampling) {
132       permute_vectors(this->hbuffer_out.data(), v, n, nv, this->hindexmap.data(), 0, H2OPUS_HWTYPE_CPU,
133                       this->stream);
134     } else {
135 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
136       permute_vectors(this->dbuffer_out.data(), v, n, nv, this->dindexmap.data(), 0, H2OPUS_HWTYPE_GPU,
137                       this->stream);
138 #endif
139     }
140   }
141 }
142 
143 void PetscMatrixSampler::SetGPUSampling(bool gpusampling)
144 {
145   this->gpusampling = gpusampling;
146 }
147 
148 PetscMatrixSampler::~PetscMatrixSampler()
149 {
150   PetscErrorCode ierr;
151 
152   ierr = MatDestroy(&A);CHKERRV(ierr);
153 }
154 
155 void PetscMatrixSampler::sample(H2Opus_Real *x, H2Opus_Real *y, int samples)
156 {
157   PetscErrorCode ierr;
158   MPI_Comm       comm = PetscObjectComm((PetscObject)this->A);
159   Mat            X = NULL,Y = NULL;
160   PetscInt       M,N,m,n;
161   H2Opus_Real    *px,*py;
162 
163   if (!this->A) CHKERRV(PETSC_ERR_PLIB);
164   ierr = MatGetSize(this->A,&M,&N);CHKERRV(ierr);
165   ierr = MatGetLocalSize(this->A,&m,&n);CHKERRV(ierr);
166   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRV(ierr);
167   PermuteBuffersIn(samples,x,&px,y,&py);
168   if (!this->gpusampling) {
169     ierr = MatCreateDense(comm,n,PETSC_DECIDE,N,samples,px,&X);CHKERRV(ierr);
170     ierr = MatCreateDense(comm,m,PETSC_DECIDE,M,samples,py,&Y);CHKERRV(ierr);
171   } else {
172 #if defined(PETSC_HAVE_CUDA)
173     ierr = MatCreateDenseCUDA(comm,n,PETSC_DECIDE,N,samples,px,&X);CHKERRV(ierr);
174     ierr = MatCreateDenseCUDA(comm,m,PETSC_DECIDE,M,samples,py,&Y);CHKERRV(ierr);
175 #endif
176   }
177   ierr = PetscLogObjectParent((PetscObject)this->A,(PetscObject)X);CHKERRV(ierr);
178   ierr = PetscLogObjectParent((PetscObject)this->A,(PetscObject)Y);CHKERRV(ierr);
179   ierr = MatMatMult(this->A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRV(ierr);
180 #if defined(PETSC_HAVE_CUDA)
181   if (this->gpusampling) {
182     const PetscScalar *dummy;
183     ierr = MatDenseCUDAGetArrayRead(Y,&dummy);CHKERRV(ierr);
184     ierr = MatDenseCUDARestoreArrayRead(Y,&dummy);CHKERRV(ierr);
185   }
186 #endif
187   PermuteBuffersOut(samples,y);
188   ierr = MatDestroy(&X);CHKERRV(ierr);
189   ierr = MatDestroy(&Y);CHKERRV(ierr);
190 }
191 
192 #endif
193