xref: /petsc/src/mat/impls/h2opus/cuda/math2opusutils.cu (revision e9c2de13f1527bfd8ec0d39a07e637e43d1d9c6b)
1f17f7ee2SSatish Balay #include <petsc/private/matimpl.h>
2f17f7ee2SSatish Balay #include <petsc/private/vecimpl.h>
3f17f7ee2SSatish Balay #include <petscsf.h>
4f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
5f17f7ee2SSatish Balay   #include <thrust/for_each.h>
6f17f7ee2SSatish Balay   #include <thrust/device_vector.h>
7f17f7ee2SSatish Balay   #include <thrust/execution_policy.h>
8f17f7ee2SSatish Balay #endif
9f17f7ee2SSatish Balay 
MatDenseGetH2OpusStridedSF(Mat A,PetscSF h2sf,PetscSF * osf)10*0dd791a8SStefano Zampini PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
11d71ae5a4SJacob Faibussowitsch {
12f17f7ee2SSatish Balay   PetscSF asf;
13f17f7ee2SSatish Balay 
14f17f7ee2SSatish Balay   PetscFunctionBegin;
15f17f7ee2SSatish Balay   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
16f17f7ee2SSatish Balay   PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2);
174f572ea9SToby Isaac   PetscAssertPointer(osf, 3);
18*0dd791a8SStefano Zampini   PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
19f17f7ee2SSatish Balay   if (!asf) {
20f17f7ee2SSatish Balay     PetscInt lda;
21f17f7ee2SSatish Balay 
22f17f7ee2SSatish Balay     PetscCall(MatDenseGetLDA(A, &lda));
23*0dd791a8SStefano Zampini     PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
24*0dd791a8SStefano Zampini     PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
25f17f7ee2SSatish Balay     PetscCall(PetscObjectDereference((PetscObject)asf));
26f17f7ee2SSatish Balay   }
27f17f7ee2SSatish Balay   *osf = asf;
283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29f17f7ee2SSatish Balay }
30f17f7ee2SSatish Balay 
31f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
329371c9d4SSatish Balay struct SignVector_Functor {
33f17f7ee2SSatish Balay   const PetscScalar *v;
34f17f7ee2SSatish Balay   PetscScalar       *s;
SignVector_FunctorSignVector_Functor35f17f7ee2SSatish Balay   SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }
36f17f7ee2SSatish Balay 
operator ()SignVector_Functor379371c9d4SSatish Balay   __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
38f17f7ee2SSatish Balay };
39f17f7ee2SSatish Balay #endif
40f17f7ee2SSatish Balay 
VecSign(Vec v,Vec s)41d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
42d71ae5a4SJacob Faibussowitsch {
43f17f7ee2SSatish Balay   const PetscScalar *av;
44f17f7ee2SSatish Balay   PetscScalar       *as;
45f17f7ee2SSatish Balay   PetscInt           i, n;
46f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
47f17f7ee2SSatish Balay   PetscBool viscuda, siscuda;
48f17f7ee2SSatish Balay #endif
49f17f7ee2SSatish Balay 
50f17f7ee2SSatish Balay   PetscFunctionBegin;
51f17f7ee2SSatish Balay   PetscValidHeaderSpecific(v, VEC_CLASSID, 1);
52f17f7ee2SSatish Balay   PetscValidHeaderSpecific(s, VEC_CLASSID, 2);
53f17f7ee2SSatish Balay   PetscCall(VecGetLocalSize(s, &n));
54f17f7ee2SSatish Balay   PetscCall(VecGetLocalSize(v, &i));
55f17f7ee2SSatish Balay   PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
56f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
57f17f7ee2SSatish Balay   PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
58f17f7ee2SSatish Balay   PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
59f17f7ee2SSatish Balay   viscuda = (PetscBool)(viscuda && !v->boundtocpu);
60f17f7ee2SSatish Balay   siscuda = (PetscBool)(siscuda && !s->boundtocpu);
61f17f7ee2SSatish Balay   if (viscuda && siscuda) {
62f17f7ee2SSatish Balay     PetscCall(VecCUDAGetArrayRead(v, &av));
63f17f7ee2SSatish Balay     PetscCall(VecCUDAGetArrayWrite(s, &as));
64f17f7ee2SSatish Balay     SignVector_Functor sign_vector(av, as);
659371c9d4SSatish Balay     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
66f17f7ee2SSatish Balay     PetscCall(VecCUDARestoreArrayWrite(s, &as));
67f17f7ee2SSatish Balay     PetscCall(VecCUDARestoreArrayRead(v, &av));
68f17f7ee2SSatish Balay   } else
69f17f7ee2SSatish Balay #endif
70f17f7ee2SSatish Balay   {
71f17f7ee2SSatish Balay     PetscCall(VecGetArrayRead(v, &av));
72f17f7ee2SSatish Balay     PetscCall(VecGetArrayWrite(s, &as));
73f17f7ee2SSatish Balay     for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
74f17f7ee2SSatish Balay     PetscCall(VecRestoreArrayWrite(s, &as));
75f17f7ee2SSatish Balay     PetscCall(VecRestoreArrayRead(v, &av));
76f17f7ee2SSatish Balay   }
773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
78f17f7ee2SSatish Balay }
79f17f7ee2SSatish Balay 
80f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
819371c9d4SSatish Balay struct StandardBasis_Functor {
82f17f7ee2SSatish Balay   PetscScalar *v;
83f17f7ee2SSatish Balay   PetscInt     j;
84f17f7ee2SSatish Balay 
StandardBasis_FunctorStandardBasis_Functor85f17f7ee2SSatish Balay   StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
operator ()StandardBasis_Functor869371c9d4SSatish Balay   __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
87f17f7ee2SSatish Balay };
88f17f7ee2SSatish Balay #endif
89f17f7ee2SSatish Balay 
VecSetDelta(Vec x,PetscInt i)90d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
91d71ae5a4SJacob Faibussowitsch {
92f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
93f17f7ee2SSatish Balay   PetscBool iscuda;
94f17f7ee2SSatish Balay #endif
95f17f7ee2SSatish Balay   PetscInt st, en;
96f17f7ee2SSatish Balay 
97f17f7ee2SSatish Balay   PetscFunctionBegin;
98f17f7ee2SSatish Balay   PetscCall(VecGetOwnershipRange(x, &st, &en));
99f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
100f17f7ee2SSatish Balay   PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
101f17f7ee2SSatish Balay   iscuda = (PetscBool)(iscuda && !x->boundtocpu);
102f17f7ee2SSatish Balay   if (iscuda) {
103f17f7ee2SSatish Balay     PetscScalar *ax;
104f17f7ee2SSatish Balay     PetscCall(VecCUDAGetArrayWrite(x, &ax));
105f17f7ee2SSatish Balay     StandardBasis_Functor delta(ax, i - st);
1069371c9d4SSatish Balay     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
107f17f7ee2SSatish Balay     PetscCall(VecCUDARestoreArrayWrite(x, &ax));
108f17f7ee2SSatish Balay   } else
109f17f7ee2SSatish Balay #endif
110f17f7ee2SSatish Balay   {
111f17f7ee2SSatish Balay     PetscCall(VecSet(x, 0.));
11248a46eb9SPierre Jolivet     if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
113f17f7ee2SSatish Balay     PetscCall(VecAssemblyBegin(x));
114f17f7ee2SSatish Balay     PetscCall(VecAssemblyEnd(x));
115f17f7ee2SSatish Balay   }
1163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
117f17f7ee2SSatish Balay }
118f17f7ee2SSatish Balay 
119f17f7ee2SSatish Balay /* these are approximate norms */
120f17f7ee2SSatish Balay /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
121f17f7ee2SSatish Balay    NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
MatApproximateNorm_Private(Mat A,NormType normtype,PetscInt normsamples,PetscReal * n)122d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
123d71ae5a4SJacob Faibussowitsch {
124f17f7ee2SSatish Balay   Vec         x, y, w, z;
125f17f7ee2SSatish Balay   PetscReal   normz, adot;
126f17f7ee2SSatish Balay   PetscScalar dot;
127f17f7ee2SSatish Balay   PetscInt    i, j, N, jold = -1;
128f17f7ee2SSatish Balay   PetscBool   boundtocpu = PETSC_TRUE;
129f17f7ee2SSatish Balay 
130f17f7ee2SSatish Balay   PetscFunctionBegin;
131f17f7ee2SSatish Balay #if defined(PETSC_HAVE_DEVICE)
132f17f7ee2SSatish Balay   boundtocpu = A->boundtocpu;
133f17f7ee2SSatish Balay #endif
134f17f7ee2SSatish Balay   switch (normtype) {
135f17f7ee2SSatish Balay   case NORM_INFINITY:
136f17f7ee2SSatish Balay   case NORM_1:
137f17f7ee2SSatish Balay     if (normsamples < 0) normsamples = 10; /* pure guess */
138f17f7ee2SSatish Balay     if (normtype == NORM_INFINITY) {
139f17f7ee2SSatish Balay       Mat B;
140f17f7ee2SSatish Balay       PetscCall(MatCreateTranspose(A, &B));
141f17f7ee2SSatish Balay       A = B;
142f17f7ee2SSatish Balay     } else {
143f17f7ee2SSatish Balay       PetscCall(PetscObjectReference((PetscObject)A));
144f17f7ee2SSatish Balay     }
145f17f7ee2SSatish Balay     PetscCall(MatCreateVecs(A, &x, &y));
146f17f7ee2SSatish Balay     PetscCall(MatCreateVecs(A, &z, &w));
147f17f7ee2SSatish Balay     PetscCall(VecBindToCPU(x, boundtocpu));
148f17f7ee2SSatish Balay     PetscCall(VecBindToCPU(y, boundtocpu));
149f17f7ee2SSatish Balay     PetscCall(VecBindToCPU(z, boundtocpu));
150f17f7ee2SSatish Balay     PetscCall(VecBindToCPU(w, boundtocpu));
151f17f7ee2SSatish Balay     PetscCall(VecGetSize(x, &N));
152f17f7ee2SSatish Balay     PetscCall(VecSet(x, 1. / N));
153f17f7ee2SSatish Balay     *n = 0.0;
154f17f7ee2SSatish Balay     for (i = 0; i < normsamples; i++) {
155f17f7ee2SSatish Balay       PetscCall(MatMult(A, x, y));
156f17f7ee2SSatish Balay       PetscCall(VecSign(y, w));
157f17f7ee2SSatish Balay       PetscCall(MatMultTranspose(A, w, z));
158f17f7ee2SSatish Balay       PetscCall(VecNorm(z, NORM_INFINITY, &normz));
159f17f7ee2SSatish Balay       PetscCall(VecDot(x, z, &dot));
160f17f7ee2SSatish Balay       adot = PetscAbsScalar(dot);
161f17f7ee2SSatish Balay       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
162f17f7ee2SSatish Balay       if (normz <= adot && i > 0) {
163f17f7ee2SSatish Balay         PetscCall(VecNorm(y, NORM_1, n));
164f17f7ee2SSatish Balay         break;
165f17f7ee2SSatish Balay       }
166f17f7ee2SSatish Balay       PetscCall(VecMax(z, &j, &normz));
167f17f7ee2SSatish Balay       if (j == jold) {
168f17f7ee2SSatish Balay         PetscCall(VecNorm(y, NORM_1, n));
169f17f7ee2SSatish Balay         PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
170f17f7ee2SSatish Balay         break;
171f17f7ee2SSatish Balay       }
172f17f7ee2SSatish Balay       jold = j;
173f17f7ee2SSatish Balay       PetscCall(VecSetDelta(x, j));
174f17f7ee2SSatish Balay     }
175f17f7ee2SSatish Balay     PetscCall(MatDestroy(&A));
176f17f7ee2SSatish Balay     PetscCall(VecDestroy(&x));
177f17f7ee2SSatish Balay     PetscCall(VecDestroy(&w));
178f17f7ee2SSatish Balay     PetscCall(VecDestroy(&y));
179f17f7ee2SSatish Balay     PetscCall(VecDestroy(&z));
180f17f7ee2SSatish Balay     break;
181f17f7ee2SSatish Balay   case NORM_2:
182f17f7ee2SSatish Balay     if (normsamples < 0) normsamples = 20; /* pure guess */
183f17f7ee2SSatish Balay     PetscCall(MatCreateVecs(A, &x, &y));
184f17f7ee2SSatish Balay     PetscCall(MatCreateVecs(A, &z, NULL));
185f17f7ee2SSatish Balay     PetscCall(VecBindToCPU(x, boundtocpu));
186f17f7ee2SSatish Balay     PetscCall(VecBindToCPU(y, boundtocpu));
187f17f7ee2SSatish Balay     PetscCall(VecBindToCPU(z, boundtocpu));
188f17f7ee2SSatish Balay     PetscCall(VecSetRandom(x, NULL));
189f17f7ee2SSatish Balay     PetscCall(VecNormalize(x, NULL));
190f17f7ee2SSatish Balay     *n = 0.0;
191f17f7ee2SSatish Balay     for (i = 0; i < normsamples; i++) {
192f17f7ee2SSatish Balay       PetscCall(MatMult(A, x, y));
193f17f7ee2SSatish Balay       PetscCall(VecNormalize(y, n));
194f17f7ee2SSatish Balay       PetscCall(MatMultTranspose(A, y, z));
195f17f7ee2SSatish Balay       PetscCall(VecNorm(z, NORM_2, &normz));
196f17f7ee2SSatish Balay       PetscCall(VecDot(x, z, &dot));
197f17f7ee2SSatish Balay       adot = PetscAbsScalar(dot);
198f17f7ee2SSatish Balay       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
199f17f7ee2SSatish Balay       if (normz <= adot) break;
200f17f7ee2SSatish Balay       if (i < normsamples - 1) {
201f17f7ee2SSatish Balay         Vec t;
202f17f7ee2SSatish Balay 
203f17f7ee2SSatish Balay         PetscCall(VecNormalize(z, NULL));
204f17f7ee2SSatish Balay         t = x;
205f17f7ee2SSatish Balay         x = z;
206f17f7ee2SSatish Balay         z = t;
207f17f7ee2SSatish Balay       }
208f17f7ee2SSatish Balay     }
209f17f7ee2SSatish Balay     PetscCall(VecDestroy(&x));
210f17f7ee2SSatish Balay     PetscCall(VecDestroy(&y));
211f17f7ee2SSatish Balay     PetscCall(VecDestroy(&z));
212f17f7ee2SSatish Balay     break;
213d71ae5a4SJacob Faibussowitsch   default:
214d71ae5a4SJacob Faibussowitsch     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
215f17f7ee2SSatish Balay   }
216f17f7ee2SSatish Balay   PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
2173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
218f17f7ee2SSatish Balay }
219