1 #include <petsc/private/matimpl.h> 2 #include <petsc/private/vecimpl.h> 3 #include <petscsf.h> 4 #if defined(PETSC_HAVE_CUDA) 5 #include <thrust/for_each.h> 6 #include <thrust/device_vector.h> 7 #include <thrust/execution_policy.h> 8 #endif 9 10 PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 11 { 12 PetscSF rankssf; 13 const PetscSFNode *iremote; 14 PetscSFNode *viremote, *rremotes; 15 const PetscInt *ilocal; 16 PetscInt *vilocal = NULL, *ldrs; 17 const PetscMPIInt *ranks; 18 PetscMPIInt *sranks; 19 PetscInt nranks, nr, nl, vnr, vnl, i, v, j, maxl; 20 MPI_Comm comm; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 24 PetscValidLogicalCollectiveInt(sf, nv, 2); 25 PetscValidPointer(vsf, 5); 26 if (nv == 1) { 27 PetscCall(PetscObjectReference((PetscObject)sf)); 28 *vsf = sf; 29 PetscFunctionReturn(PETSC_SUCCESS); 30 } 31 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 32 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 33 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 34 maxl += 1; 35 if (ldl == PETSC_DECIDE) ldl = maxl; 36 if (ldr == PETSC_DECIDE) ldr = nr; 37 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldr, nr); 38 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldl, maxl); 39 vnr = nr * nv; 40 vnl = nl * nv; 41 PetscCall(PetscMalloc1(vnl, &viremote)); 42 if (ilocal) PetscCall(PetscMalloc1(vnl, &vilocal)); 43 44 /* TODO: Should this special SF be available, e.g. 45 PetscSFGetRanksSF or similar? */ 46 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 47 PetscCall(PetscMalloc1(nranks, &sranks)); 48 PetscCall(PetscArraycpy(sranks, ranks, nranks)); 49 PetscCall(PetscSortMPIInt(nranks, sranks)); 50 PetscCall(PetscMalloc1(nranks, &rremotes)); 51 for (i = 0; i < nranks; i++) { 52 rremotes[i].rank = sranks[i]; 53 rremotes[i].index = 0; 54 } 55 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &rankssf)); 56 PetscCall(PetscSFSetGraph(rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 57 PetscCall(PetscMalloc1(nranks, &ldrs)); 58 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 59 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 60 PetscCall(PetscSFDestroy(&rankssf)); 61 62 j = -1; 63 for (i = 0; i < nl; i++) { 64 const PetscInt r = iremote[i].rank; 65 const PetscInt ii = iremote[i].index; 66 67 if (j < 0 || sranks[j] != r) PetscCall(PetscFindMPIInt(r, nranks, sranks, &j)); 68 PetscCheck(j >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 69 for (v = 0; v < nv; v++) { 70 viremote[v * nl + i].rank = r; 71 viremote[v * nl + i].index = v * ldrs[j] + ii; 72 if (ilocal) vilocal[v * nl + i] = v * ldl + ilocal[i]; 73 } 74 } 75 PetscCall(PetscFree(sranks)); 76 PetscCall(PetscFree(ldrs)); 77 PetscCall(PetscSFCreate(comm, vsf)); 78 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf) 83 { 84 PetscSF asf; 85 86 PetscFunctionBegin; 87 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 88 PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2); 89 PetscValidPointer(osf, 3); 90 PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_vectorsf", (PetscObject *)&asf)); 91 if (!asf) { 92 PetscInt lda; 93 94 PetscCall(MatDenseGetLDA(A, &lda)); 95 PetscCall(PetscSFGetVectorSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf)); 96 PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_vectorsf", (PetscObject)asf)); 97 PetscCall(PetscObjectDereference((PetscObject)asf)); 98 } 99 *osf = asf; 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 #if defined(PETSC_HAVE_CUDA) 104 struct SignVector_Functor { 105 const PetscScalar *v; 106 PetscScalar *s; 107 SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { } 108 109 __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); } 110 }; 111 #endif 112 113 PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s) 114 { 115 const PetscScalar *av; 116 PetscScalar *as; 117 PetscInt i, n; 118 #if defined(PETSC_HAVE_CUDA) 119 PetscBool viscuda, siscuda; 120 #endif 121 122 PetscFunctionBegin; 123 PetscValidHeaderSpecific(v, VEC_CLASSID, 1); 124 PetscValidHeaderSpecific(s, VEC_CLASSID, 2); 125 PetscCall(VecGetLocalSize(s, &n)); 126 PetscCall(VecGetLocalSize(v, &i)); 127 PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n); 128 #if defined(PETSC_HAVE_CUDA) 129 PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, "")); 130 PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, "")); 131 viscuda = (PetscBool)(viscuda && !v->boundtocpu); 132 siscuda = (PetscBool)(siscuda && !s->boundtocpu); 133 if (viscuda && siscuda) { 134 PetscCall(VecCUDAGetArrayRead(v, &av)); 135 PetscCall(VecCUDAGetArrayWrite(s, &as)); 136 SignVector_Functor sign_vector(av, as); 137 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector); 138 PetscCall(VecCUDARestoreArrayWrite(s, &as)); 139 PetscCall(VecCUDARestoreArrayRead(v, &av)); 140 } else 141 #endif 142 { 143 PetscCall(VecGetArrayRead(v, &av)); 144 PetscCall(VecGetArrayWrite(s, &as)); 145 for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.; 146 PetscCall(VecRestoreArrayWrite(s, &as)); 147 PetscCall(VecRestoreArrayRead(v, &av)); 148 } 149 PetscFunctionReturn(PETSC_SUCCESS); 150 } 151 152 #if defined(PETSC_HAVE_CUDA) 153 struct StandardBasis_Functor { 154 PetscScalar *v; 155 PetscInt j; 156 157 StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { } 158 __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); } 159 }; 160 #endif 161 162 PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i) 163 { 164 #if defined(PETSC_HAVE_CUDA) 165 PetscBool iscuda; 166 #endif 167 PetscInt st, en; 168 169 PetscFunctionBegin; 170 PetscCall(VecGetOwnershipRange(x, &st, &en)); 171 #if defined(PETSC_HAVE_CUDA) 172 PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, "")); 173 iscuda = (PetscBool)(iscuda && !x->boundtocpu); 174 if (iscuda) { 175 PetscScalar *ax; 176 PetscCall(VecCUDAGetArrayWrite(x, &ax)); 177 StandardBasis_Functor delta(ax, i - st); 178 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta); 179 PetscCall(VecCUDARestoreArrayWrite(x, &ax)); 180 } else 181 #endif 182 { 183 PetscCall(VecSet(x, 0.)); 184 if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES)); 185 PetscCall(VecAssemblyBegin(x)); 186 PetscCall(VecAssemblyEnd(x)); 187 } 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 /* these are approximate norms */ 192 /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham 193 NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */ 194 PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n) 195 { 196 Vec x, y, w, z; 197 PetscReal normz, adot; 198 PetscScalar dot; 199 PetscInt i, j, N, jold = -1; 200 PetscBool boundtocpu = PETSC_TRUE; 201 202 PetscFunctionBegin; 203 #if defined(PETSC_HAVE_DEVICE) 204 boundtocpu = A->boundtocpu; 205 #endif 206 switch (normtype) { 207 case NORM_INFINITY: 208 case NORM_1: 209 if (normsamples < 0) normsamples = 10; /* pure guess */ 210 if (normtype == NORM_INFINITY) { 211 Mat B; 212 PetscCall(MatCreateTranspose(A, &B)); 213 A = B; 214 } else { 215 PetscCall(PetscObjectReference((PetscObject)A)); 216 } 217 PetscCall(MatCreateVecs(A, &x, &y)); 218 PetscCall(MatCreateVecs(A, &z, &w)); 219 PetscCall(VecBindToCPU(x, boundtocpu)); 220 PetscCall(VecBindToCPU(y, boundtocpu)); 221 PetscCall(VecBindToCPU(z, boundtocpu)); 222 PetscCall(VecBindToCPU(w, boundtocpu)); 223 PetscCall(VecGetSize(x, &N)); 224 PetscCall(VecSet(x, 1. / N)); 225 *n = 0.0; 226 for (i = 0; i < normsamples; i++) { 227 PetscCall(MatMult(A, x, y)); 228 PetscCall(VecSign(y, w)); 229 PetscCall(MatMultTranspose(A, w, z)); 230 PetscCall(VecNorm(z, NORM_INFINITY, &normz)); 231 PetscCall(VecDot(x, z, &dot)); 232 adot = PetscAbsScalar(dot); 233 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot)); 234 if (normz <= adot && i > 0) { 235 PetscCall(VecNorm(y, NORM_1, n)); 236 break; 237 } 238 PetscCall(VecMax(z, &j, &normz)); 239 if (j == jold) { 240 PetscCall(VecNorm(y, NORM_1, n)); 241 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i)); 242 break; 243 } 244 jold = j; 245 PetscCall(VecSetDelta(x, j)); 246 } 247 PetscCall(MatDestroy(&A)); 248 PetscCall(VecDestroy(&x)); 249 PetscCall(VecDestroy(&w)); 250 PetscCall(VecDestroy(&y)); 251 PetscCall(VecDestroy(&z)); 252 break; 253 case NORM_2: 254 if (normsamples < 0) normsamples = 20; /* pure guess */ 255 PetscCall(MatCreateVecs(A, &x, &y)); 256 PetscCall(MatCreateVecs(A, &z, NULL)); 257 PetscCall(VecBindToCPU(x, boundtocpu)); 258 PetscCall(VecBindToCPU(y, boundtocpu)); 259 PetscCall(VecBindToCPU(z, boundtocpu)); 260 PetscCall(VecSetRandom(x, NULL)); 261 PetscCall(VecNormalize(x, NULL)); 262 *n = 0.0; 263 for (i = 0; i < normsamples; i++) { 264 PetscCall(MatMult(A, x, y)); 265 PetscCall(VecNormalize(y, n)); 266 PetscCall(MatMultTranspose(A, y, z)); 267 PetscCall(VecNorm(z, NORM_2, &normz)); 268 PetscCall(VecDot(x, z, &dot)); 269 adot = PetscAbsScalar(dot); 270 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot)); 271 if (normz <= adot) break; 272 if (i < normsamples - 1) { 273 Vec t; 274 275 PetscCall(VecNormalize(z, NULL)); 276 t = x; 277 x = z; 278 z = t; 279 } 280 } 281 PetscCall(VecDestroy(&x)); 282 PetscCall(VecDestroy(&y)); 283 PetscCall(VecDestroy(&z)); 284 break; 285 default: 286 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]); 287 } 288 PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i)); 289 PetscFunctionReturn(PETSC_SUCCESS); 290 } 291