1 #include <petsc/private/matimpl.h> 2 #include <petsc/private/vecimpl.h> 3 #include <petscsf.h> 4 #if defined(PETSC_HAVE_CUDA) 5 #include <thrust/for_each.h> 6 #include <thrust/device_vector.h> 7 #include <thrust/execution_policy.h> 8 #endif 9 10 PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 11 { 12 PetscSF rankssf; 13 const PetscSFNode *iremote; 14 PetscSFNode *viremote,*rremotes; 15 const PetscInt *ilocal; 16 PetscInt *vilocal = NULL,*ldrs; 17 const PetscMPIInt *ranks; 18 PetscMPIInt *sranks; 19 PetscInt nranks,nr,nl,vnr,vnl,i,v,j,maxl; 20 MPI_Comm comm; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 24 PetscValidLogicalCollectiveInt(sf,nv,2); 25 PetscValidPointer(vsf,5); 26 if (nv == 1) { 27 PetscCall(PetscObjectReference((PetscObject)sf)); 28 *vsf = sf; 29 PetscFunctionReturn(0); 30 } 31 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 32 PetscCall(PetscSFGetGraph(sf,&nr,&nl,&ilocal,&iremote)); 33 PetscCall(PetscSFGetLeafRange(sf,NULL,&maxl)); 34 maxl += 1; 35 if (ldl == PETSC_DECIDE) ldl = maxl; 36 if (ldr == PETSC_DECIDE) ldr = nr; 37 PetscCheck(ldr >= nr,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT,ldr,nr); 38 PetscCheck(ldl >= maxl,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT,ldl,maxl); 39 vnr = nr*nv; 40 vnl = nl*nv; 41 PetscCall(PetscMalloc1(vnl,&viremote)); 42 if (ilocal) PetscCall(PetscMalloc1(vnl,&vilocal)); 43 44 /* TODO: Should this special SF be available, e.g. 45 PetscSFGetRanksSF or similar? */ 46 PetscCall(PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL)); 47 PetscCall(PetscMalloc1(nranks,&sranks)); 48 PetscCall(PetscArraycpy(sranks,ranks,nranks)); 49 PetscCall(PetscSortMPIInt(nranks,sranks)); 50 PetscCall(PetscMalloc1(nranks,&rremotes)); 51 for (i=0;i<nranks;i++) { 52 rremotes[i].rank = sranks[i]; 53 rremotes[i].index = 0; 54 } 55 PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&rankssf)); 56 PetscCall(PetscSFSetGraph(rankssf,1,nranks,NULL,PETSC_OWN_POINTER,rremotes,PETSC_OWN_POINTER)); 57 PetscCall(PetscMalloc1(nranks,&ldrs)); 58 PetscCall(PetscSFBcastBegin(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE)); 59 PetscCall(PetscSFBcastEnd(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE)); 60 PetscCall(PetscSFDestroy(&rankssf)); 61 62 j = -1; 63 for (i=0;i<nl;i++) { 64 const PetscInt r = iremote[i].rank; 65 const PetscInt ii = iremote[i].index; 66 67 if (j < 0 || sranks[j] != r) { 68 PetscCall(PetscFindMPIInt(r,nranks,sranks,&j)); 69 } 70 PetscCheck(j >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unable to locate neighbor rank %" PetscInt_FMT,r); 71 for (v=0;v<nv;v++) { 72 viremote[v*nl + i].rank = r; 73 viremote[v*nl + i].index = v*ldrs[j] + ii; 74 if (ilocal) vilocal[v*nl + i] = v*ldl + ilocal[i]; 75 } 76 } 77 PetscCall(PetscFree(sranks)); 78 PetscCall(PetscFree(ldrs)); 79 PetscCall(PetscSFCreate(comm,vsf)); 80 PetscCall(PetscSFSetGraph(*vsf,vnr,vnl,vilocal,PETSC_OWN_POINTER,viremote,PETSC_OWN_POINTER)); 81 PetscFunctionReturn(0); 82 } 83 84 PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf) 85 { 86 PetscSF asf; 87 88 PetscFunctionBegin; 89 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 90 PetscValidHeaderSpecific(h2sf,PETSCSF_CLASSID,2); 91 PetscValidPointer(osf,3); 92 PetscCall(PetscObjectQuery((PetscObject)A,"_math2opus_vectorsf",(PetscObject*)&asf)); 93 if (!asf) { 94 PetscInt lda; 95 96 PetscCall(MatDenseGetLDA(A,&lda)); 97 PetscCall(PetscSFGetVectorSF(h2sf,A->cmap->N,lda,PETSC_DECIDE,&asf)); 98 PetscCall(PetscObjectCompose((PetscObject)A,"_math2opus_vectorsf",(PetscObject)asf)); 99 PetscCall(PetscObjectDereference((PetscObject)asf)); 100 } 101 *osf = asf; 102 PetscFunctionReturn(0); 103 } 104 105 #if defined(PETSC_HAVE_CUDA) 106 struct SignVector_Functor 107 { 108 const PetscScalar *v; 109 PetscScalar *s; 110 SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) {} 111 112 __host__ __device__ void operator()(PetscInt i) 113 { 114 s[i] = (v[i] < 0 ? -1 : 1); 115 } 116 }; 117 #endif 118 119 PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s) 120 { 121 const PetscScalar *av; 122 PetscScalar *as; 123 PetscInt i,n; 124 #if defined(PETSC_HAVE_CUDA) 125 PetscBool viscuda,siscuda; 126 #endif 127 128 PetscFunctionBegin; 129 PetscValidHeaderSpecific(v,VEC_CLASSID,1); 130 PetscValidHeaderSpecific(s,VEC_CLASSID,2); 131 PetscCall(VecGetLocalSize(s,&n)); 132 PetscCall(VecGetLocalSize(v,&i)); 133 PetscCheck(i == n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT,i,n); 134 #if defined(PETSC_HAVE_CUDA) 135 PetscCall(PetscObjectTypeCompareAny((PetscObject)v,&viscuda,VECSEQCUDA,VECMPICUDA,"")); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)s,&siscuda,VECSEQCUDA,VECMPICUDA,"")); 137 viscuda = (PetscBool)(viscuda && !v->boundtocpu); 138 siscuda = (PetscBool)(siscuda && !s->boundtocpu); 139 if (viscuda && siscuda) { 140 PetscCall(VecCUDAGetArrayRead(v,&av)); 141 PetscCall(VecCUDAGetArrayWrite(s,&as)); 142 SignVector_Functor sign_vector(av, as); 143 thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0), 144 thrust::counting_iterator<PetscInt>(n), sign_vector); 145 PetscCall(VecCUDARestoreArrayWrite(s,&as)); 146 PetscCall(VecCUDARestoreArrayRead(v,&av)); 147 } else 148 #endif 149 { 150 PetscCall(VecGetArrayRead(v,&av)); 151 PetscCall(VecGetArrayWrite(s,&as)); 152 for (i=0;i<n;i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.; 153 PetscCall(VecRestoreArrayWrite(s,&as)); 154 PetscCall(VecRestoreArrayRead(v,&av)); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #if defined(PETSC_HAVE_CUDA) 160 struct StandardBasis_Functor 161 { 162 PetscScalar *v; 163 PetscInt j; 164 165 StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) {} 166 __host__ __device__ void operator()(PetscInt i) 167 { 168 v[i] = (i == j ? 1 : 0); 169 } 170 }; 171 #endif 172 173 PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i) 174 { 175 #if defined(PETSC_HAVE_CUDA) 176 PetscBool iscuda; 177 #endif 178 PetscInt st,en; 179 180 PetscFunctionBegin; 181 PetscCall(VecGetOwnershipRange(x,&st,&en)); 182 #if defined(PETSC_HAVE_CUDA) 183 PetscCall(PetscObjectTypeCompareAny((PetscObject)x,&iscuda,VECSEQCUDA,VECMPICUDA,"")); 184 iscuda = (PetscBool)(iscuda && !x->boundtocpu); 185 if (iscuda) { 186 PetscScalar *ax; 187 PetscCall(VecCUDAGetArrayWrite(x,&ax)); 188 StandardBasis_Functor delta(ax, i-st); 189 thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0), 190 thrust::counting_iterator<PetscInt>(en-st), delta); 191 PetscCall(VecCUDARestoreArrayWrite(x,&ax)); 192 } else 193 #endif 194 { 195 PetscCall(VecSet(x,0.)); 196 if (st <= i && i < en) { 197 PetscCall(VecSetValue(x,i,1.0,INSERT_VALUES)); 198 } 199 PetscCall(VecAssemblyBegin(x)); 200 PetscCall(VecAssemblyEnd(x)); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 /* these are approximate norms */ 206 /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham 207 NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */ 208 PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal* n) 209 { 210 Vec x,y,w,z; 211 PetscReal normz,adot; 212 PetscScalar dot; 213 PetscInt i,j,N,jold = -1; 214 PetscBool boundtocpu = PETSC_TRUE; 215 216 PetscFunctionBegin; 217 #if defined(PETSC_HAVE_DEVICE) 218 boundtocpu = A->boundtocpu; 219 #endif 220 switch (normtype) { 221 case NORM_INFINITY: 222 case NORM_1: 223 if (normsamples < 0) normsamples = 10; /* pure guess */ 224 if (normtype == NORM_INFINITY) { 225 Mat B; 226 PetscCall(MatCreateTranspose(A,&B)); 227 A = B; 228 } else { 229 PetscCall(PetscObjectReference((PetscObject)A)); 230 } 231 PetscCall(MatCreateVecs(A,&x,&y)); 232 PetscCall(MatCreateVecs(A,&z,&w)); 233 PetscCall(VecBindToCPU(x,boundtocpu)); 234 PetscCall(VecBindToCPU(y,boundtocpu)); 235 PetscCall(VecBindToCPU(z,boundtocpu)); 236 PetscCall(VecBindToCPU(w,boundtocpu)); 237 PetscCall(VecGetSize(x,&N)); 238 PetscCall(VecSet(x,1./N)); 239 *n = 0.0; 240 for (i = 0; i < normsamples; i++) { 241 PetscCall(MatMult(A,x,y)); 242 PetscCall(VecSign(y,w)); 243 PetscCall(MatMultTranspose(A,w,z)); 244 PetscCall(VecNorm(z,NORM_INFINITY,&normz)); 245 PetscCall(VecDot(x,z,&dot)); 246 adot = PetscAbsScalar(dot); 247 PetscCall(PetscInfo(A,"%s norm it %" PetscInt_FMT " -> (%g %g)\n",NormTypes[normtype],i,(double)normz,(double)adot)); 248 if (normz <= adot && i > 0) { 249 PetscCall(VecNorm(y,NORM_1,n)); 250 break; 251 } 252 PetscCall(VecMax(z,&j,&normz)); 253 if (j == jold) { 254 PetscCall(VecNorm(y,NORM_1,n)); 255 PetscCall(PetscInfo(A,"%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n",NormTypes[normtype],i)); 256 break; 257 } 258 jold = j; 259 PetscCall(VecSetDelta(x,j)); 260 } 261 PetscCall(MatDestroy(&A)); 262 PetscCall(VecDestroy(&x)); 263 PetscCall(VecDestroy(&w)); 264 PetscCall(VecDestroy(&y)); 265 PetscCall(VecDestroy(&z)); 266 break; 267 case NORM_2: 268 if (normsamples < 0) normsamples = 20; /* pure guess */ 269 PetscCall(MatCreateVecs(A,&x,&y)); 270 PetscCall(MatCreateVecs(A,&z,NULL)); 271 PetscCall(VecBindToCPU(x,boundtocpu)); 272 PetscCall(VecBindToCPU(y,boundtocpu)); 273 PetscCall(VecBindToCPU(z,boundtocpu)); 274 PetscCall(VecSetRandom(x,NULL)); 275 PetscCall(VecNormalize(x,NULL)); 276 *n = 0.0; 277 for (i = 0; i < normsamples; i++) { 278 PetscCall(MatMult(A,x,y)); 279 PetscCall(VecNormalize(y,n)); 280 PetscCall(MatMultTranspose(A,y,z)); 281 PetscCall(VecNorm(z,NORM_2,&normz)); 282 PetscCall(VecDot(x,z,&dot)); 283 adot = PetscAbsScalar(dot); 284 PetscCall(PetscInfo(A,"%s norm it %" PetscInt_FMT " -> %g (%g %g)\n",NormTypes[normtype],i,(double)*n,(double)normz,(double)adot)); 285 if (normz <= adot) break; 286 if (i < normsamples - 1) { 287 Vec t; 288 289 PetscCall(VecNormalize(z,NULL)); 290 t = x; 291 x = z; 292 z = t; 293 } 294 } 295 PetscCall(VecDestroy(&x)); 296 PetscCall(VecDestroy(&y)); 297 PetscCall(VecDestroy(&z)); 298 break; 299 default: 300 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"%s norm not supported",NormTypes[normtype]); 301 } 302 PetscCall(PetscInfo(A,"%s norm %g computed in %" PetscInt_FMT " iterations\n",NormTypes[normtype],(double)*n,i)); 303 PetscFunctionReturn(0); 304 } 305