xref: /petsc/src/mat/impls/h2opus/cuda/math2opusutils.cu (revision e9c2de13f1527bfd8ec0d39a07e637e43d1d9c6b)
1 #include <petsc/private/matimpl.h>
2 #include <petsc/private/vecimpl.h>
3 #include <petscsf.h>
4 #if defined(PETSC_HAVE_CUDA)
5   #include <thrust/for_each.h>
6   #include <thrust/device_vector.h>
7   #include <thrust/execution_policy.h>
8 #endif
9 
MatDenseGetH2OpusStridedSF(Mat A,PetscSF h2sf,PetscSF * osf)10 PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
11 {
12   PetscSF asf;
13 
14   PetscFunctionBegin;
15   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
16   PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2);
17   PetscAssertPointer(osf, 3);
18   PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
19   if (!asf) {
20     PetscInt lda;
21 
22     PetscCall(MatDenseGetLDA(A, &lda));
23     PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
24     PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
25     PetscCall(PetscObjectDereference((PetscObject)asf));
26   }
27   *osf = asf;
28   PetscFunctionReturn(PETSC_SUCCESS);
29 }
30 
31 #if defined(PETSC_HAVE_CUDA)
32 struct SignVector_Functor {
33   const PetscScalar *v;
34   PetscScalar       *s;
SignVector_FunctorSignVector_Functor35   SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }
36 
operator ()SignVector_Functor37   __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
38 };
39 #endif
40 
VecSign(Vec v,Vec s)41 PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
42 {
43   const PetscScalar *av;
44   PetscScalar       *as;
45   PetscInt           i, n;
46 #if defined(PETSC_HAVE_CUDA)
47   PetscBool viscuda, siscuda;
48 #endif
49 
50   PetscFunctionBegin;
51   PetscValidHeaderSpecific(v, VEC_CLASSID, 1);
52   PetscValidHeaderSpecific(s, VEC_CLASSID, 2);
53   PetscCall(VecGetLocalSize(s, &n));
54   PetscCall(VecGetLocalSize(v, &i));
55   PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
56 #if defined(PETSC_HAVE_CUDA)
57   PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
58   PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
59   viscuda = (PetscBool)(viscuda && !v->boundtocpu);
60   siscuda = (PetscBool)(siscuda && !s->boundtocpu);
61   if (viscuda && siscuda) {
62     PetscCall(VecCUDAGetArrayRead(v, &av));
63     PetscCall(VecCUDAGetArrayWrite(s, &as));
64     SignVector_Functor sign_vector(av, as);
65     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
66     PetscCall(VecCUDARestoreArrayWrite(s, &as));
67     PetscCall(VecCUDARestoreArrayRead(v, &av));
68   } else
69 #endif
70   {
71     PetscCall(VecGetArrayRead(v, &av));
72     PetscCall(VecGetArrayWrite(s, &as));
73     for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
74     PetscCall(VecRestoreArrayWrite(s, &as));
75     PetscCall(VecRestoreArrayRead(v, &av));
76   }
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
80 #if defined(PETSC_HAVE_CUDA)
81 struct StandardBasis_Functor {
82   PetscScalar *v;
83   PetscInt     j;
84 
StandardBasis_FunctorStandardBasis_Functor85   StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
operator ()StandardBasis_Functor86   __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
87 };
88 #endif
89 
VecSetDelta(Vec x,PetscInt i)90 PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
91 {
92 #if defined(PETSC_HAVE_CUDA)
93   PetscBool iscuda;
94 #endif
95   PetscInt st, en;
96 
97   PetscFunctionBegin;
98   PetscCall(VecGetOwnershipRange(x, &st, &en));
99 #if defined(PETSC_HAVE_CUDA)
100   PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
101   iscuda = (PetscBool)(iscuda && !x->boundtocpu);
102   if (iscuda) {
103     PetscScalar *ax;
104     PetscCall(VecCUDAGetArrayWrite(x, &ax));
105     StandardBasis_Functor delta(ax, i - st);
106     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
107     PetscCall(VecCUDARestoreArrayWrite(x, &ax));
108   } else
109 #endif
110   {
111     PetscCall(VecSet(x, 0.));
112     if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
113     PetscCall(VecAssemblyBegin(x));
114     PetscCall(VecAssemblyEnd(x));
115   }
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 /* these are approximate norms */
120 /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
121    NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
MatApproximateNorm_Private(Mat A,NormType normtype,PetscInt normsamples,PetscReal * n)122 PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
123 {
124   Vec         x, y, w, z;
125   PetscReal   normz, adot;
126   PetscScalar dot;
127   PetscInt    i, j, N, jold = -1;
128   PetscBool   boundtocpu = PETSC_TRUE;
129 
130   PetscFunctionBegin;
131 #if defined(PETSC_HAVE_DEVICE)
132   boundtocpu = A->boundtocpu;
133 #endif
134   switch (normtype) {
135   case NORM_INFINITY:
136   case NORM_1:
137     if (normsamples < 0) normsamples = 10; /* pure guess */
138     if (normtype == NORM_INFINITY) {
139       Mat B;
140       PetscCall(MatCreateTranspose(A, &B));
141       A = B;
142     } else {
143       PetscCall(PetscObjectReference((PetscObject)A));
144     }
145     PetscCall(MatCreateVecs(A, &x, &y));
146     PetscCall(MatCreateVecs(A, &z, &w));
147     PetscCall(VecBindToCPU(x, boundtocpu));
148     PetscCall(VecBindToCPU(y, boundtocpu));
149     PetscCall(VecBindToCPU(z, boundtocpu));
150     PetscCall(VecBindToCPU(w, boundtocpu));
151     PetscCall(VecGetSize(x, &N));
152     PetscCall(VecSet(x, 1. / N));
153     *n = 0.0;
154     for (i = 0; i < normsamples; i++) {
155       PetscCall(MatMult(A, x, y));
156       PetscCall(VecSign(y, w));
157       PetscCall(MatMultTranspose(A, w, z));
158       PetscCall(VecNorm(z, NORM_INFINITY, &normz));
159       PetscCall(VecDot(x, z, &dot));
160       adot = PetscAbsScalar(dot);
161       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
162       if (normz <= adot && i > 0) {
163         PetscCall(VecNorm(y, NORM_1, n));
164         break;
165       }
166       PetscCall(VecMax(z, &j, &normz));
167       if (j == jold) {
168         PetscCall(VecNorm(y, NORM_1, n));
169         PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
170         break;
171       }
172       jold = j;
173       PetscCall(VecSetDelta(x, j));
174     }
175     PetscCall(MatDestroy(&A));
176     PetscCall(VecDestroy(&x));
177     PetscCall(VecDestroy(&w));
178     PetscCall(VecDestroy(&y));
179     PetscCall(VecDestroy(&z));
180     break;
181   case NORM_2:
182     if (normsamples < 0) normsamples = 20; /* pure guess */
183     PetscCall(MatCreateVecs(A, &x, &y));
184     PetscCall(MatCreateVecs(A, &z, NULL));
185     PetscCall(VecBindToCPU(x, boundtocpu));
186     PetscCall(VecBindToCPU(y, boundtocpu));
187     PetscCall(VecBindToCPU(z, boundtocpu));
188     PetscCall(VecSetRandom(x, NULL));
189     PetscCall(VecNormalize(x, NULL));
190     *n = 0.0;
191     for (i = 0; i < normsamples; i++) {
192       PetscCall(MatMult(A, x, y));
193       PetscCall(VecNormalize(y, n));
194       PetscCall(MatMultTranspose(A, y, z));
195       PetscCall(VecNorm(z, NORM_2, &normz));
196       PetscCall(VecDot(x, z, &dot));
197       adot = PetscAbsScalar(dot);
198       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
199       if (normz <= adot) break;
200       if (i < normsamples - 1) {
201         Vec t;
202 
203         PetscCall(VecNormalize(z, NULL));
204         t = x;
205         x = z;
206         z = t;
207       }
208     }
209     PetscCall(VecDestroy(&x));
210     PetscCall(VecDestroy(&y));
211     PetscCall(VecDestroy(&z));
212     break;
213   default:
214     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
215   }
216   PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219