1f17f7ee2SSatish Balay #include <petsc/private/matimpl.h>
2f17f7ee2SSatish Balay #include <petsc/private/vecimpl.h>
3f17f7ee2SSatish Balay #include <petscsf.h>
4f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
5f17f7ee2SSatish Balay #include <thrust/for_each.h>
6f17f7ee2SSatish Balay #include <thrust/device_vector.h>
7f17f7ee2SSatish Balay #include <thrust/execution_policy.h>
8f17f7ee2SSatish Balay #endif
9f17f7ee2SSatish Balay
MatDenseGetH2OpusStridedSF(Mat A,PetscSF h2sf,PetscSF * osf)10*0dd791a8SStefano Zampini PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
11d71ae5a4SJacob Faibussowitsch {
12f17f7ee2SSatish Balay PetscSF asf;
13f17f7ee2SSatish Balay
14f17f7ee2SSatish Balay PetscFunctionBegin;
15f17f7ee2SSatish Balay PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
16f17f7ee2SSatish Balay PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2);
174f572ea9SToby Isaac PetscAssertPointer(osf, 3);
18*0dd791a8SStefano Zampini PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
19f17f7ee2SSatish Balay if (!asf) {
20f17f7ee2SSatish Balay PetscInt lda;
21f17f7ee2SSatish Balay
22f17f7ee2SSatish Balay PetscCall(MatDenseGetLDA(A, &lda));
23*0dd791a8SStefano Zampini PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
24*0dd791a8SStefano Zampini PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
25f17f7ee2SSatish Balay PetscCall(PetscObjectDereference((PetscObject)asf));
26f17f7ee2SSatish Balay }
27f17f7ee2SSatish Balay *osf = asf;
283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
29f17f7ee2SSatish Balay }
30f17f7ee2SSatish Balay
31f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
329371c9d4SSatish Balay struct SignVector_Functor {
33f17f7ee2SSatish Balay const PetscScalar *v;
34f17f7ee2SSatish Balay PetscScalar *s;
SignVector_FunctorSignVector_Functor35f17f7ee2SSatish Balay SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }
36f17f7ee2SSatish Balay
operator ()SignVector_Functor379371c9d4SSatish Balay __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
38f17f7ee2SSatish Balay };
39f17f7ee2SSatish Balay #endif
40f17f7ee2SSatish Balay
VecSign(Vec v,Vec s)41d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
42d71ae5a4SJacob Faibussowitsch {
43f17f7ee2SSatish Balay const PetscScalar *av;
44f17f7ee2SSatish Balay PetscScalar *as;
45f17f7ee2SSatish Balay PetscInt i, n;
46f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
47f17f7ee2SSatish Balay PetscBool viscuda, siscuda;
48f17f7ee2SSatish Balay #endif
49f17f7ee2SSatish Balay
50f17f7ee2SSatish Balay PetscFunctionBegin;
51f17f7ee2SSatish Balay PetscValidHeaderSpecific(v, VEC_CLASSID, 1);
52f17f7ee2SSatish Balay PetscValidHeaderSpecific(s, VEC_CLASSID, 2);
53f17f7ee2SSatish Balay PetscCall(VecGetLocalSize(s, &n));
54f17f7ee2SSatish Balay PetscCall(VecGetLocalSize(v, &i));
55f17f7ee2SSatish Balay PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
56f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
57f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
58f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
59f17f7ee2SSatish Balay viscuda = (PetscBool)(viscuda && !v->boundtocpu);
60f17f7ee2SSatish Balay siscuda = (PetscBool)(siscuda && !s->boundtocpu);
61f17f7ee2SSatish Balay if (viscuda && siscuda) {
62f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayRead(v, &av));
63f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayWrite(s, &as));
64f17f7ee2SSatish Balay SignVector_Functor sign_vector(av, as);
659371c9d4SSatish Balay thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
66f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayWrite(s, &as));
67f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayRead(v, &av));
68f17f7ee2SSatish Balay } else
69f17f7ee2SSatish Balay #endif
70f17f7ee2SSatish Balay {
71f17f7ee2SSatish Balay PetscCall(VecGetArrayRead(v, &av));
72f17f7ee2SSatish Balay PetscCall(VecGetArrayWrite(s, &as));
73f17f7ee2SSatish Balay for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
74f17f7ee2SSatish Balay PetscCall(VecRestoreArrayWrite(s, &as));
75f17f7ee2SSatish Balay PetscCall(VecRestoreArrayRead(v, &av));
76f17f7ee2SSatish Balay }
773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
78f17f7ee2SSatish Balay }
79f17f7ee2SSatish Balay
80f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
819371c9d4SSatish Balay struct StandardBasis_Functor {
82f17f7ee2SSatish Balay PetscScalar *v;
83f17f7ee2SSatish Balay PetscInt j;
84f17f7ee2SSatish Balay
StandardBasis_FunctorStandardBasis_Functor85f17f7ee2SSatish Balay StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
operator ()StandardBasis_Functor869371c9d4SSatish Balay __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
87f17f7ee2SSatish Balay };
88f17f7ee2SSatish Balay #endif
89f17f7ee2SSatish Balay
VecSetDelta(Vec x,PetscInt i)90d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
91d71ae5a4SJacob Faibussowitsch {
92f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
93f17f7ee2SSatish Balay PetscBool iscuda;
94f17f7ee2SSatish Balay #endif
95f17f7ee2SSatish Balay PetscInt st, en;
96f17f7ee2SSatish Balay
97f17f7ee2SSatish Balay PetscFunctionBegin;
98f17f7ee2SSatish Balay PetscCall(VecGetOwnershipRange(x, &st, &en));
99f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA)
100f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
101f17f7ee2SSatish Balay iscuda = (PetscBool)(iscuda && !x->boundtocpu);
102f17f7ee2SSatish Balay if (iscuda) {
103f17f7ee2SSatish Balay PetscScalar *ax;
104f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayWrite(x, &ax));
105f17f7ee2SSatish Balay StandardBasis_Functor delta(ax, i - st);
1069371c9d4SSatish Balay thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
107f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayWrite(x, &ax));
108f17f7ee2SSatish Balay } else
109f17f7ee2SSatish Balay #endif
110f17f7ee2SSatish Balay {
111f17f7ee2SSatish Balay PetscCall(VecSet(x, 0.));
11248a46eb9SPierre Jolivet if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
113f17f7ee2SSatish Balay PetscCall(VecAssemblyBegin(x));
114f17f7ee2SSatish Balay PetscCall(VecAssemblyEnd(x));
115f17f7ee2SSatish Balay }
1163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
117f17f7ee2SSatish Balay }
118f17f7ee2SSatish Balay
119f17f7ee2SSatish Balay /* these are approximate norms */
120f17f7ee2SSatish Balay /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
121f17f7ee2SSatish Balay NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
MatApproximateNorm_Private(Mat A,NormType normtype,PetscInt normsamples,PetscReal * n)122d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
123d71ae5a4SJacob Faibussowitsch {
124f17f7ee2SSatish Balay Vec x, y, w, z;
125f17f7ee2SSatish Balay PetscReal normz, adot;
126f17f7ee2SSatish Balay PetscScalar dot;
127f17f7ee2SSatish Balay PetscInt i, j, N, jold = -1;
128f17f7ee2SSatish Balay PetscBool boundtocpu = PETSC_TRUE;
129f17f7ee2SSatish Balay
130f17f7ee2SSatish Balay PetscFunctionBegin;
131f17f7ee2SSatish Balay #if defined(PETSC_HAVE_DEVICE)
132f17f7ee2SSatish Balay boundtocpu = A->boundtocpu;
133f17f7ee2SSatish Balay #endif
134f17f7ee2SSatish Balay switch (normtype) {
135f17f7ee2SSatish Balay case NORM_INFINITY:
136f17f7ee2SSatish Balay case NORM_1:
137f17f7ee2SSatish Balay if (normsamples < 0) normsamples = 10; /* pure guess */
138f17f7ee2SSatish Balay if (normtype == NORM_INFINITY) {
139f17f7ee2SSatish Balay Mat B;
140f17f7ee2SSatish Balay PetscCall(MatCreateTranspose(A, &B));
141f17f7ee2SSatish Balay A = B;
142f17f7ee2SSatish Balay } else {
143f17f7ee2SSatish Balay PetscCall(PetscObjectReference((PetscObject)A));
144f17f7ee2SSatish Balay }
145f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &x, &y));
146f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &z, &w));
147f17f7ee2SSatish Balay PetscCall(VecBindToCPU(x, boundtocpu));
148f17f7ee2SSatish Balay PetscCall(VecBindToCPU(y, boundtocpu));
149f17f7ee2SSatish Balay PetscCall(VecBindToCPU(z, boundtocpu));
150f17f7ee2SSatish Balay PetscCall(VecBindToCPU(w, boundtocpu));
151f17f7ee2SSatish Balay PetscCall(VecGetSize(x, &N));
152f17f7ee2SSatish Balay PetscCall(VecSet(x, 1. / N));
153f17f7ee2SSatish Balay *n = 0.0;
154f17f7ee2SSatish Balay for (i = 0; i < normsamples; i++) {
155f17f7ee2SSatish Balay PetscCall(MatMult(A, x, y));
156f17f7ee2SSatish Balay PetscCall(VecSign(y, w));
157f17f7ee2SSatish Balay PetscCall(MatMultTranspose(A, w, z));
158f17f7ee2SSatish Balay PetscCall(VecNorm(z, NORM_INFINITY, &normz));
159f17f7ee2SSatish Balay PetscCall(VecDot(x, z, &dot));
160f17f7ee2SSatish Balay adot = PetscAbsScalar(dot);
161f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
162f17f7ee2SSatish Balay if (normz <= adot && i > 0) {
163f17f7ee2SSatish Balay PetscCall(VecNorm(y, NORM_1, n));
164f17f7ee2SSatish Balay break;
165f17f7ee2SSatish Balay }
166f17f7ee2SSatish Balay PetscCall(VecMax(z, &j, &normz));
167f17f7ee2SSatish Balay if (j == jold) {
168f17f7ee2SSatish Balay PetscCall(VecNorm(y, NORM_1, n));
169f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
170f17f7ee2SSatish Balay break;
171f17f7ee2SSatish Balay }
172f17f7ee2SSatish Balay jold = j;
173f17f7ee2SSatish Balay PetscCall(VecSetDelta(x, j));
174f17f7ee2SSatish Balay }
175f17f7ee2SSatish Balay PetscCall(MatDestroy(&A));
176f17f7ee2SSatish Balay PetscCall(VecDestroy(&x));
177f17f7ee2SSatish Balay PetscCall(VecDestroy(&w));
178f17f7ee2SSatish Balay PetscCall(VecDestroy(&y));
179f17f7ee2SSatish Balay PetscCall(VecDestroy(&z));
180f17f7ee2SSatish Balay break;
181f17f7ee2SSatish Balay case NORM_2:
182f17f7ee2SSatish Balay if (normsamples < 0) normsamples = 20; /* pure guess */
183f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &x, &y));
184f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &z, NULL));
185f17f7ee2SSatish Balay PetscCall(VecBindToCPU(x, boundtocpu));
186f17f7ee2SSatish Balay PetscCall(VecBindToCPU(y, boundtocpu));
187f17f7ee2SSatish Balay PetscCall(VecBindToCPU(z, boundtocpu));
188f17f7ee2SSatish Balay PetscCall(VecSetRandom(x, NULL));
189f17f7ee2SSatish Balay PetscCall(VecNormalize(x, NULL));
190f17f7ee2SSatish Balay *n = 0.0;
191f17f7ee2SSatish Balay for (i = 0; i < normsamples; i++) {
192f17f7ee2SSatish Balay PetscCall(MatMult(A, x, y));
193f17f7ee2SSatish Balay PetscCall(VecNormalize(y, n));
194f17f7ee2SSatish Balay PetscCall(MatMultTranspose(A, y, z));
195f17f7ee2SSatish Balay PetscCall(VecNorm(z, NORM_2, &normz));
196f17f7ee2SSatish Balay PetscCall(VecDot(x, z, &dot));
197f17f7ee2SSatish Balay adot = PetscAbsScalar(dot);
198f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
199f17f7ee2SSatish Balay if (normz <= adot) break;
200f17f7ee2SSatish Balay if (i < normsamples - 1) {
201f17f7ee2SSatish Balay Vec t;
202f17f7ee2SSatish Balay
203f17f7ee2SSatish Balay PetscCall(VecNormalize(z, NULL));
204f17f7ee2SSatish Balay t = x;
205f17f7ee2SSatish Balay x = z;
206f17f7ee2SSatish Balay z = t;
207f17f7ee2SSatish Balay }
208f17f7ee2SSatish Balay }
209f17f7ee2SSatish Balay PetscCall(VecDestroy(&x));
210f17f7ee2SSatish Balay PetscCall(VecDestroy(&y));
211f17f7ee2SSatish Balay PetscCall(VecDestroy(&z));
212f17f7ee2SSatish Balay break;
213d71ae5a4SJacob Faibussowitsch default:
214d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
215f17f7ee2SSatish Balay }
216f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
2173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
218f17f7ee2SSatish Balay }
219