1 #include <petsc/private/matimpl.h>
2 #include <petsc/private/vecimpl.h>
3 #include <petscsf.h>
4 #if defined(PETSC_HAVE_CUDA)
5 #include <thrust/for_each.h>
6 #include <thrust/device_vector.h>
7 #include <thrust/execution_policy.h>
8 #endif
9
MatDenseGetH2OpusStridedSF(Mat A,PetscSF h2sf,PetscSF * osf)10 PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
11 {
12 PetscSF asf;
13
14 PetscFunctionBegin;
15 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
16 PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2);
17 PetscAssertPointer(osf, 3);
18 PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
19 if (!asf) {
20 PetscInt lda;
21
22 PetscCall(MatDenseGetLDA(A, &lda));
23 PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
24 PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
25 PetscCall(PetscObjectDereference((PetscObject)asf));
26 }
27 *osf = asf;
28 PetscFunctionReturn(PETSC_SUCCESS);
29 }
30
31 #if defined(PETSC_HAVE_CUDA)
32 struct SignVector_Functor {
33 const PetscScalar *v;
34 PetscScalar *s;
SignVector_FunctorSignVector_Functor35 SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { }
36
operator ()SignVector_Functor37 __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); }
38 };
39 #endif
40
VecSign(Vec v,Vec s)41 PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
42 {
43 const PetscScalar *av;
44 PetscScalar *as;
45 PetscInt i, n;
46 #if defined(PETSC_HAVE_CUDA)
47 PetscBool viscuda, siscuda;
48 #endif
49
50 PetscFunctionBegin;
51 PetscValidHeaderSpecific(v, VEC_CLASSID, 1);
52 PetscValidHeaderSpecific(s, VEC_CLASSID, 2);
53 PetscCall(VecGetLocalSize(s, &n));
54 PetscCall(VecGetLocalSize(v, &i));
55 PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n);
56 #if defined(PETSC_HAVE_CUDA)
57 PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, ""));
58 PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, ""));
59 viscuda = (PetscBool)(viscuda && !v->boundtocpu);
60 siscuda = (PetscBool)(siscuda && !s->boundtocpu);
61 if (viscuda && siscuda) {
62 PetscCall(VecCUDAGetArrayRead(v, &av));
63 PetscCall(VecCUDAGetArrayWrite(s, &as));
64 SignVector_Functor sign_vector(av, as);
65 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector);
66 PetscCall(VecCUDARestoreArrayWrite(s, &as));
67 PetscCall(VecCUDARestoreArrayRead(v, &av));
68 } else
69 #endif
70 {
71 PetscCall(VecGetArrayRead(v, &av));
72 PetscCall(VecGetArrayWrite(s, &as));
73 for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
74 PetscCall(VecRestoreArrayWrite(s, &as));
75 PetscCall(VecRestoreArrayRead(v, &av));
76 }
77 PetscFunctionReturn(PETSC_SUCCESS);
78 }
79
80 #if defined(PETSC_HAVE_CUDA)
81 struct StandardBasis_Functor {
82 PetscScalar *v;
83 PetscInt j;
84
StandardBasis_FunctorStandardBasis_Functor85 StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
operator ()StandardBasis_Functor86 __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
87 };
88 #endif
89
VecSetDelta(Vec x,PetscInt i)90 PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
91 {
92 #if defined(PETSC_HAVE_CUDA)
93 PetscBool iscuda;
94 #endif
95 PetscInt st, en;
96
97 PetscFunctionBegin;
98 PetscCall(VecGetOwnershipRange(x, &st, &en));
99 #if defined(PETSC_HAVE_CUDA)
100 PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
101 iscuda = (PetscBool)(iscuda && !x->boundtocpu);
102 if (iscuda) {
103 PetscScalar *ax;
104 PetscCall(VecCUDAGetArrayWrite(x, &ax));
105 StandardBasis_Functor delta(ax, i - st);
106 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
107 PetscCall(VecCUDARestoreArrayWrite(x, &ax));
108 } else
109 #endif
110 {
111 PetscCall(VecSet(x, 0.));
112 if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
113 PetscCall(VecAssemblyBegin(x));
114 PetscCall(VecAssemblyEnd(x));
115 }
116 PetscFunctionReturn(PETSC_SUCCESS);
117 }
118
119 /* these are approximate norms */
120 /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
121 NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
MatApproximateNorm_Private(Mat A,NormType normtype,PetscInt normsamples,PetscReal * n)122 PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
123 {
124 Vec x, y, w, z;
125 PetscReal normz, adot;
126 PetscScalar dot;
127 PetscInt i, j, N, jold = -1;
128 PetscBool boundtocpu = PETSC_TRUE;
129
130 PetscFunctionBegin;
131 #if defined(PETSC_HAVE_DEVICE)
132 boundtocpu = A->boundtocpu;
133 #endif
134 switch (normtype) {
135 case NORM_INFINITY:
136 case NORM_1:
137 if (normsamples < 0) normsamples = 10; /* pure guess */
138 if (normtype == NORM_INFINITY) {
139 Mat B;
140 PetscCall(MatCreateTranspose(A, &B));
141 A = B;
142 } else {
143 PetscCall(PetscObjectReference((PetscObject)A));
144 }
145 PetscCall(MatCreateVecs(A, &x, &y));
146 PetscCall(MatCreateVecs(A, &z, &w));
147 PetscCall(VecBindToCPU(x, boundtocpu));
148 PetscCall(VecBindToCPU(y, boundtocpu));
149 PetscCall(VecBindToCPU(z, boundtocpu));
150 PetscCall(VecBindToCPU(w, boundtocpu));
151 PetscCall(VecGetSize(x, &N));
152 PetscCall(VecSet(x, 1. / N));
153 *n = 0.0;
154 for (i = 0; i < normsamples; i++) {
155 PetscCall(MatMult(A, x, y));
156 PetscCall(VecSign(y, w));
157 PetscCall(MatMultTranspose(A, w, z));
158 PetscCall(VecNorm(z, NORM_INFINITY, &normz));
159 PetscCall(VecDot(x, z, &dot));
160 adot = PetscAbsScalar(dot);
161 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
162 if (normz <= adot && i > 0) {
163 PetscCall(VecNorm(y, NORM_1, n));
164 break;
165 }
166 PetscCall(VecMax(z, &j, &normz));
167 if (j == jold) {
168 PetscCall(VecNorm(y, NORM_1, n));
169 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
170 break;
171 }
172 jold = j;
173 PetscCall(VecSetDelta(x, j));
174 }
175 PetscCall(MatDestroy(&A));
176 PetscCall(VecDestroy(&x));
177 PetscCall(VecDestroy(&w));
178 PetscCall(VecDestroy(&y));
179 PetscCall(VecDestroy(&z));
180 break;
181 case NORM_2:
182 if (normsamples < 0) normsamples = 20; /* pure guess */
183 PetscCall(MatCreateVecs(A, &x, &y));
184 PetscCall(MatCreateVecs(A, &z, NULL));
185 PetscCall(VecBindToCPU(x, boundtocpu));
186 PetscCall(VecBindToCPU(y, boundtocpu));
187 PetscCall(VecBindToCPU(z, boundtocpu));
188 PetscCall(VecSetRandom(x, NULL));
189 PetscCall(VecNormalize(x, NULL));
190 *n = 0.0;
191 for (i = 0; i < normsamples; i++) {
192 PetscCall(MatMult(A, x, y));
193 PetscCall(VecNormalize(y, n));
194 PetscCall(MatMultTranspose(A, y, z));
195 PetscCall(VecNorm(z, NORM_2, &normz));
196 PetscCall(VecDot(x, z, &dot));
197 adot = PetscAbsScalar(dot);
198 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
199 if (normz <= adot) break;
200 if (i < normsamples - 1) {
201 Vec t;
202
203 PetscCall(VecNormalize(z, NULL));
204 t = x;
205 x = z;
206 z = t;
207 }
208 }
209 PetscCall(VecDestroy(&x));
210 PetscCall(VecDestroy(&y));
211 PetscCall(VecDestroy(&z));
212 break;
213 default:
214 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
215 }
216 PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
217 PetscFunctionReturn(PETSC_SUCCESS);
218 }
219