1 #include <petsc/private/matimpl.h> 2 #include <petsc/private/vecimpl.h> 3 #include <petscsf.h> 4 #if defined(PETSC_HAVE_CUDA) 5 #include <thrust/for_each.h> 6 #include <thrust/device_vector.h> 7 #include <thrust/execution_policy.h> 8 #endif 9 10 PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf) { 11 PetscSF rankssf; 12 const PetscSFNode *iremote; 13 PetscSFNode *viremote, *rremotes; 14 const PetscInt *ilocal; 15 PetscInt *vilocal = NULL, *ldrs; 16 const PetscMPIInt *ranks; 17 PetscMPIInt *sranks; 18 PetscInt nranks, nr, nl, vnr, vnl, i, v, j, maxl; 19 MPI_Comm comm; 20 21 PetscFunctionBegin; 22 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 23 PetscValidLogicalCollectiveInt(sf, nv, 2); 24 PetscValidPointer(vsf, 5); 25 if (nv == 1) { 26 PetscCall(PetscObjectReference((PetscObject)sf)); 27 *vsf = sf; 28 PetscFunctionReturn(0); 29 } 30 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 31 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 32 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 33 maxl += 1; 34 if (ldl == PETSC_DECIDE) ldl = maxl; 35 if (ldr == PETSC_DECIDE) ldr = nr; 36 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldr, nr); 37 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldl, maxl); 38 vnr = nr * nv; 39 vnl = nl * nv; 40 PetscCall(PetscMalloc1(vnl, &viremote)); 41 if (ilocal) PetscCall(PetscMalloc1(vnl, &vilocal)); 42 43 /* TODO: Should this special SF be available, e.g. 44 PetscSFGetRanksSF or similar? */ 45 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 46 PetscCall(PetscMalloc1(nranks, &sranks)); 47 PetscCall(PetscArraycpy(sranks, ranks, nranks)); 48 PetscCall(PetscSortMPIInt(nranks, sranks)); 49 PetscCall(PetscMalloc1(nranks, &rremotes)); 50 for (i = 0; i < nranks; i++) { 51 rremotes[i].rank = sranks[i]; 52 rremotes[i].index = 0; 53 } 54 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &rankssf)); 55 PetscCall(PetscSFSetGraph(rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 56 PetscCall(PetscMalloc1(nranks, &ldrs)); 57 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 58 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 59 PetscCall(PetscSFDestroy(&rankssf)); 60 61 j = -1; 62 for (i = 0; i < nl; i++) { 63 const PetscInt r = iremote[i].rank; 64 const PetscInt ii = iremote[i].index; 65 66 if (j < 0 || sranks[j] != r) { PetscCall(PetscFindMPIInt(r, nranks, sranks, &j)); } 67 PetscCheck(j >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 68 for (v = 0; v < nv; v++) { 69 viremote[v * nl + i].rank = r; 70 viremote[v * nl + i].index = v * ldrs[j] + ii; 71 if (ilocal) vilocal[v * nl + i] = v * ldl + ilocal[i]; 72 } 73 } 74 PetscCall(PetscFree(sranks)); 75 PetscCall(PetscFree(ldrs)); 76 PetscCall(PetscSFCreate(comm, vsf)); 77 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 78 PetscFunctionReturn(0); 79 } 80 81 PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf) { 82 PetscSF asf; 83 84 PetscFunctionBegin; 85 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 86 PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2); 87 PetscValidPointer(osf, 3); 88 PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_vectorsf", (PetscObject *)&asf)); 89 if (!asf) { 90 PetscInt lda; 91 92 PetscCall(MatDenseGetLDA(A, &lda)); 93 PetscCall(PetscSFGetVectorSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf)); 94 PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_vectorsf", (PetscObject)asf)); 95 PetscCall(PetscObjectDereference((PetscObject)asf)); 96 } 97 *osf = asf; 98 PetscFunctionReturn(0); 99 } 100 101 #if defined(PETSC_HAVE_CUDA) 102 struct SignVector_Functor { 103 const PetscScalar *v; 104 PetscScalar *s; 105 SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { } 106 107 __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); } 108 }; 109 #endif 110 111 PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s) { 112 const PetscScalar *av; 113 PetscScalar *as; 114 PetscInt i, n; 115 #if defined(PETSC_HAVE_CUDA) 116 PetscBool viscuda, siscuda; 117 #endif 118 119 PetscFunctionBegin; 120 PetscValidHeaderSpecific(v, VEC_CLASSID, 1); 121 PetscValidHeaderSpecific(s, VEC_CLASSID, 2); 122 PetscCall(VecGetLocalSize(s, &n)); 123 PetscCall(VecGetLocalSize(v, &i)); 124 PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n); 125 #if defined(PETSC_HAVE_CUDA) 126 PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, "")); 127 PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, "")); 128 viscuda = (PetscBool)(viscuda && !v->boundtocpu); 129 siscuda = (PetscBool)(siscuda && !s->boundtocpu); 130 if (viscuda && siscuda) { 131 PetscCall(VecCUDAGetArrayRead(v, &av)); 132 PetscCall(VecCUDAGetArrayWrite(s, &as)); 133 SignVector_Functor sign_vector(av, as); 134 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector); 135 PetscCall(VecCUDARestoreArrayWrite(s, &as)); 136 PetscCall(VecCUDARestoreArrayRead(v, &av)); 137 } else 138 #endif 139 { 140 PetscCall(VecGetArrayRead(v, &av)); 141 PetscCall(VecGetArrayWrite(s, &as)); 142 for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.; 143 PetscCall(VecRestoreArrayWrite(s, &as)); 144 PetscCall(VecRestoreArrayRead(v, &av)); 145 } 146 PetscFunctionReturn(0); 147 } 148 149 #if defined(PETSC_HAVE_CUDA) 150 struct StandardBasis_Functor { 151 PetscScalar *v; 152 PetscInt j; 153 154 StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { } 155 __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); } 156 }; 157 #endif 158 159 PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i) { 160 #if defined(PETSC_HAVE_CUDA) 161 PetscBool iscuda; 162 #endif 163 PetscInt st, en; 164 165 PetscFunctionBegin; 166 PetscCall(VecGetOwnershipRange(x, &st, &en)); 167 #if defined(PETSC_HAVE_CUDA) 168 PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, "")); 169 iscuda = (PetscBool)(iscuda && !x->boundtocpu); 170 if (iscuda) { 171 PetscScalar *ax; 172 PetscCall(VecCUDAGetArrayWrite(x, &ax)); 173 StandardBasis_Functor delta(ax, i - st); 174 thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta); 175 PetscCall(VecCUDARestoreArrayWrite(x, &ax)); 176 } else 177 #endif 178 { 179 PetscCall(VecSet(x, 0.)); 180 if (st <= i && i < en) { PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES)); } 181 PetscCall(VecAssemblyBegin(x)); 182 PetscCall(VecAssemblyEnd(x)); 183 } 184 PetscFunctionReturn(0); 185 } 186 187 /* these are approximate norms */ 188 /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham 189 NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */ 190 PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n) { 191 Vec x, y, w, z; 192 PetscReal normz, adot; 193 PetscScalar dot; 194 PetscInt i, j, N, jold = -1; 195 PetscBool boundtocpu = PETSC_TRUE; 196 197 PetscFunctionBegin; 198 #if defined(PETSC_HAVE_DEVICE) 199 boundtocpu = A->boundtocpu; 200 #endif 201 switch (normtype) { 202 case NORM_INFINITY: 203 case NORM_1: 204 if (normsamples < 0) normsamples = 10; /* pure guess */ 205 if (normtype == NORM_INFINITY) { 206 Mat B; 207 PetscCall(MatCreateTranspose(A, &B)); 208 A = B; 209 } else { 210 PetscCall(PetscObjectReference((PetscObject)A)); 211 } 212 PetscCall(MatCreateVecs(A, &x, &y)); 213 PetscCall(MatCreateVecs(A, &z, &w)); 214 PetscCall(VecBindToCPU(x, boundtocpu)); 215 PetscCall(VecBindToCPU(y, boundtocpu)); 216 PetscCall(VecBindToCPU(z, boundtocpu)); 217 PetscCall(VecBindToCPU(w, boundtocpu)); 218 PetscCall(VecGetSize(x, &N)); 219 PetscCall(VecSet(x, 1. / N)); 220 *n = 0.0; 221 for (i = 0; i < normsamples; i++) { 222 PetscCall(MatMult(A, x, y)); 223 PetscCall(VecSign(y, w)); 224 PetscCall(MatMultTranspose(A, w, z)); 225 PetscCall(VecNorm(z, NORM_INFINITY, &normz)); 226 PetscCall(VecDot(x, z, &dot)); 227 adot = PetscAbsScalar(dot); 228 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot)); 229 if (normz <= adot && i > 0) { 230 PetscCall(VecNorm(y, NORM_1, n)); 231 break; 232 } 233 PetscCall(VecMax(z, &j, &normz)); 234 if (j == jold) { 235 PetscCall(VecNorm(y, NORM_1, n)); 236 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i)); 237 break; 238 } 239 jold = j; 240 PetscCall(VecSetDelta(x, j)); 241 } 242 PetscCall(MatDestroy(&A)); 243 PetscCall(VecDestroy(&x)); 244 PetscCall(VecDestroy(&w)); 245 PetscCall(VecDestroy(&y)); 246 PetscCall(VecDestroy(&z)); 247 break; 248 case NORM_2: 249 if (normsamples < 0) normsamples = 20; /* pure guess */ 250 PetscCall(MatCreateVecs(A, &x, &y)); 251 PetscCall(MatCreateVecs(A, &z, NULL)); 252 PetscCall(VecBindToCPU(x, boundtocpu)); 253 PetscCall(VecBindToCPU(y, boundtocpu)); 254 PetscCall(VecBindToCPU(z, boundtocpu)); 255 PetscCall(VecSetRandom(x, NULL)); 256 PetscCall(VecNormalize(x, NULL)); 257 *n = 0.0; 258 for (i = 0; i < normsamples; i++) { 259 PetscCall(MatMult(A, x, y)); 260 PetscCall(VecNormalize(y, n)); 261 PetscCall(MatMultTranspose(A, y, z)); 262 PetscCall(VecNorm(z, NORM_2, &normz)); 263 PetscCall(VecDot(x, z, &dot)); 264 adot = PetscAbsScalar(dot); 265 PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot)); 266 if (normz <= adot) break; 267 if (i < normsamples - 1) { 268 Vec t; 269 270 PetscCall(VecNormalize(z, NULL)); 271 t = x; 272 x = z; 273 z = t; 274 } 275 } 276 PetscCall(VecDestroy(&x)); 277 PetscCall(VecDestroy(&y)); 278 PetscCall(VecDestroy(&z)); 279 break; 280 default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]); 281 } 282 PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i)); 283 PetscFunctionReturn(0); 284 } 285