1 #include <petsc/private/matimpl.h> 2 #include <petsc/private/vecimpl.h> 3 #include <petscsf.h> 4 #if defined(PETSC_HAVE_CUDA) 5 #include <thrust/for_each.h> 6 #include <thrust/device_vector.h> 7 #include <thrust/execution_policy.h> 8 #endif 9 10 PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf) 11 { 12 PetscSF asf; 13 14 PetscFunctionBegin; 15 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 16 PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2); 17 PetscAssertPointer(osf, 3); 18 PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf)); 19 if (!asf) { 20 PetscInt lda; 21 22 PetscCall(MatDenseGetLDA(A, &lda)); 23 PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf)); 24 PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf)); 25 PetscCall(PetscObjectDereference((PetscObject)asf)); 26 } 27 *osf = asf; 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 #if defined(PETSC_HAVE_CUDA) 32 struct SignVector_Functor { 33 const PetscScalar *v; 34 PetscScalar *s; 35 SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { } 36 37 __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); } 38 }; 39 #endif 40 41 PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s) 42 { 43 const PetscScalar *av; 44 PetscScalar *as; 45 PetscInt i, n; 46 #if defined(PETSC_HAVE_CUDA) 47 PetscBool viscuda, siscuda; 48 #endif 49 50 PetscFunctionBegin; 51 PetscValidHeaderSpecific(v, VEC_CLASSID, 1); 52 PetscValidHeaderSpecific(s, VEC_CLASSID, 2); 53 PetscCall(VecGetLocalSize(s, &n)); 54 PetscCall(VecGetLocalSize(v, &i)); 55 PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n); 56 #if defined(PETSC_HAVE_CUDA) 57 PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, "")); 58 PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, "")); 59 viscuda = (PetscBool)(viscuda && !v->boundtocpu); 60 siscuda = (PetscBool)(siscuda && !s->boundtocpu); 61 if (viscuda && siscuda) { 62 PetscCall(VecCUDAGetArrayRead(v, &av)); 63 PetscCall(VecCUDAGetArrayWrite(s, &as)); 64 SignVector_Functor sign_vector(av, as); 65 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector); 66 PetscCall(VecCUDARestoreArrayWrite(s, &as)); 67 PetscCall(VecCUDARestoreArrayRead(v, &av)); 68 } else 69 #endif 70 { 71 PetscCall(VecGetArrayRead(v, &av)); 72 PetscCall(VecGetArrayWrite(s, &as)); 73 for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.; 74 PetscCall(VecRestoreArrayWrite(s, &as)); 75 PetscCall(VecRestoreArrayRead(v, &av)); 76 } 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 #if defined(PETSC_HAVE_CUDA) 81 struct StandardBasis_Functor { 82 PetscScalar *v; 83 PetscInt j; 84 85 StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { } 86 __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); } 87 }; 88 #endif 89 90 PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i) 91 { 92 #if defined(PETSC_HAVE_CUDA) 93 PetscBool iscuda; 94 #endif 95 PetscInt st, en; 96 97 PetscFunctionBegin; 98 PetscCall(VecGetOwnershipRange(x, &st, &en)); 99 #if defined(PETSC_HAVE_CUDA) 100 PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, "")); 101 iscuda = (PetscBool)(iscuda && !x->boundtocpu); 102 if (iscuda) { 103 PetscScalar *ax; 104 PetscCall(VecCUDAGetArrayWrite(x, &ax)); 105 StandardBasis_Functor delta(ax, i - st); 106 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta); 107 PetscCall(VecCUDARestoreArrayWrite(x, &ax)); 108 } else 109 #endif 110 { 111 PetscCall(VecSet(x, 0.)); 112 if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES)); 113 PetscCall(VecAssemblyBegin(x)); 114 PetscCall(VecAssemblyEnd(x)); 115 } 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 /* these are approximate norms */ 120 /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham 121 NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */ 122 PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n) 123 { 124 Vec x, y, w, z; 125 PetscReal normz, adot; 126 PetscScalar dot; 127 PetscInt i, j, N, jold = -1; 128 PetscBool boundtocpu = PETSC_TRUE; 129 130 PetscFunctionBegin; 131 #if defined(PETSC_HAVE_DEVICE) 132 boundtocpu = A->boundtocpu; 133 #endif 134 switch (normtype) { 135 case NORM_INFINITY: 136 case NORM_1: 137 if (normsamples < 0) normsamples = 10; /* pure guess */ 138 if (normtype == NORM_INFINITY) { 139 Mat B; 140 PetscCall(MatCreateTranspose(A, &B)); 141 A = B; 142 } else { 143 PetscCall(PetscObjectReference((PetscObject)A)); 144 } 145 PetscCall(MatCreateVecs(A, &x, &y)); 146 PetscCall(MatCreateVecs(A, &z, &w)); 147 PetscCall(VecBindToCPU(x, boundtocpu)); 148 PetscCall(VecBindToCPU(y, boundtocpu)); 149 PetscCall(VecBindToCPU(z, boundtocpu)); 150 PetscCall(VecBindToCPU(w, boundtocpu)); 151 PetscCall(VecGetSize(x, &N)); 152 PetscCall(VecSet(x, 1. / N)); 153 *n = 0.0; 154 for (i = 0; i < normsamples; i++) { 155 PetscCall(MatMult(A, x, y)); 156 PetscCall(VecSign(y, w)); 157 PetscCall(MatMultTranspose(A, w, z)); 158 PetscCall(VecNorm(z, NORM_INFINITY, &normz)); 159 PetscCall(VecDot(x, z, &dot)); 160 adot = PetscAbsScalar(dot); 161 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot)); 162 if (normz <= adot && i > 0) { 163 PetscCall(VecNorm(y, NORM_1, n)); 164 break; 165 } 166 PetscCall(VecMax(z, &j, &normz)); 167 if (j == jold) { 168 PetscCall(VecNorm(y, NORM_1, n)); 169 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i)); 170 break; 171 } 172 jold = j; 173 PetscCall(VecSetDelta(x, j)); 174 } 175 PetscCall(MatDestroy(&A)); 176 PetscCall(VecDestroy(&x)); 177 PetscCall(VecDestroy(&w)); 178 PetscCall(VecDestroy(&y)); 179 PetscCall(VecDestroy(&z)); 180 break; 181 case NORM_2: 182 if (normsamples < 0) normsamples = 20; /* pure guess */ 183 PetscCall(MatCreateVecs(A, &x, &y)); 184 PetscCall(MatCreateVecs(A, &z, NULL)); 185 PetscCall(VecBindToCPU(x, boundtocpu)); 186 PetscCall(VecBindToCPU(y, boundtocpu)); 187 PetscCall(VecBindToCPU(z, boundtocpu)); 188 PetscCall(VecSetRandom(x, NULL)); 189 PetscCall(VecNormalize(x, NULL)); 190 *n = 0.0; 191 for (i = 0; i < normsamples; i++) { 192 PetscCall(MatMult(A, x, y)); 193 PetscCall(VecNormalize(y, n)); 194 PetscCall(MatMultTranspose(A, y, z)); 195 PetscCall(VecNorm(z, NORM_2, &normz)); 196 PetscCall(VecDot(x, z, &dot)); 197 adot = PetscAbsScalar(dot); 198 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot)); 199 if (normz <= adot) break; 200 if (i < normsamples - 1) { 201 Vec t; 202 203 PetscCall(VecNormalize(z, NULL)); 204 t = x; 205 x = z; 206 z = t; 207 } 208 } 209 PetscCall(VecDestroy(&x)); 210 PetscCall(VecDestroy(&y)); 211 PetscCall(VecDestroy(&z)); 212 break; 213 default: 214 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]); 215 } 216 PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219