1 2 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 3 4 typedef struct _n_PetscSFBasicPack *PetscSFBasicPack; 5 struct _n_PetscSFBasicPack { 6 void (*Pack)(PetscInt,PetscInt,const PetscInt*,const void*,void*); 7 void (*UnpackInsert)(PetscInt,PetscInt,const PetscInt*,void*,const void*); 8 void (*UnpackAdd)(PetscInt,PetscInt,const PetscInt*,void*,const void*); 9 void (*UnpackMin)(PetscInt,PetscInt,const PetscInt*,void*,const void*); 10 void (*UnpackMax)(PetscInt,PetscInt,const PetscInt*,void*,const void*); 11 void (*UnpackMinloc)(PetscInt,PetscInt,const PetscInt*,void*,const void*); 12 void (*UnpackMaxloc)(PetscInt,PetscInt,const PetscInt*,void*,const void*); 13 void (*UnpackMult)(PetscInt,PetscInt,const PetscInt*,void*,const void *); 14 void (*UnpackLAND)(PetscInt,PetscInt,const PetscInt*,void*,const void *); 15 void (*UnpackBAND)(PetscInt,PetscInt,const PetscInt*,void*,const void *); 16 void (*UnpackLOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *); 17 void (*UnpackBOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *); 18 void (*UnpackLXOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *); 19 void (*UnpackBXOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *); 20 void (*FetchAndInsert)(PetscInt,PetscInt,const PetscInt*,void*,void*); 21 void (*FetchAndAdd)(PetscInt,PetscInt,const PetscInt*,void*,void*); 22 void (*FetchAndMin)(PetscInt,PetscInt,const PetscInt*,void*,void*); 23 void (*FetchAndMax)(PetscInt,PetscInt,const PetscInt*,void*,void*); 24 void (*FetchAndMinloc)(PetscInt,PetscInt,const PetscInt*,void*,void*); 25 void (*FetchAndMaxloc)(PetscInt,PetscInt,const PetscInt*,void*,void*); 26 void (*FetchAndMult)(PetscInt,PetscInt,const PetscInt*,void*,void*); 27 void (*FetchAndLAND)(PetscInt,PetscInt,const PetscInt*,void*,void*); 28 void (*FetchAndBAND)(PetscInt,PetscInt,const PetscInt*,void*,void*); 29 void (*FetchAndLOR)(PetscInt,PetscInt,const PetscInt*,void*,void*); 30 void (*FetchAndBOR)(PetscInt,PetscInt,const PetscInt*,void*,void*); 31 void (*FetchAndLXOR)(PetscInt,PetscInt,const PetscInt*,void*,void*); 32 void (*FetchAndBXOR)(PetscInt,PetscInt,const PetscInt*,void*,void*); 33 34 MPI_Datatype unit; 35 size_t unitbytes; /* Number of bytes in a unit */ 36 PetscInt bs; /* Number of basic units in a unit */ 37 const void *key; /* Array used as key for operation */ 38 char **root; /* Packed root data, indexed by leaf rank */ 39 char **leaf; /* Packed leaf data, indexed by root rank */ 40 MPI_Request *requests; /* Array of root requests followed by leaf requests */ 41 PetscSFBasicPack next; 42 }; 43 44 typedef struct { 45 PetscMPIInt tag; 46 PetscMPIInt niranks; /* Number of incoming ranks (ranks accessing my roots) */ 47 PetscMPIInt ndiranks; /* Number of incoming ranks (ranks accessing my roots) in distinguished set */ 48 PetscMPIInt *iranks; /* Array of ranks that reference my roots */ 49 PetscInt itotal; /* Total number of graph edges referencing my roots */ 50 PetscInt *ioffset; /* Array of length niranks+1 holding offset in irootloc[] for each rank */ 51 PetscInt *irootloc; /* Incoming roots referenced by ranks starting at ioffset[rank] */ 52 PetscSFBasicPack avail; /* One or more entries per MPI Datatype, lazily constructed */ 53 PetscSFBasicPack inuse; /* Buffers being used for transactions that have not yet completed */ 54 } PetscSF_Basic; 55 56 #if !defined(PETSC_HAVE_MPI_TYPE_DUP) /* Danger: type is not reference counted; subject to ABA problem */ 57 PETSC_STATIC_INLINE PetscErrorCode MPI_Type_dup(MPI_Datatype datatype,MPI_Datatype *newtype) 58 { 59 *newtype = datatype; 60 return 0; 61 } 62 #endif 63 64 /* 65 * MPI_Reduce_local is not really useful because it can't handle sparse data and it vectorizes "in the wrong direction", 66 * therefore we pack data types manually. This section defines packing routines for the standard data types. 67 */ 68 69 #define CPPJoin2_exp(a,b) a ## b 70 #define CPPJoin2(a,b) CPPJoin2_exp(a,b) 71 #define CPPJoin3_exp_(a,b,c) a ## b ## _ ## c 72 #define CPPJoin3_(a,b,c) CPPJoin3_exp_(a,b,c) 73 74 /* Basic types without addition */ 75 #define DEF_PackNoInit(type,BS) \ 76 static void CPPJoin3_(Pack_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,const void *unpacked,void *packed) { \ 77 const type *u = (const type*)unpacked; \ 78 type *p = (type*)packed; \ 79 PetscInt i,j,k; \ 80 for (i=0; i<n; i++) \ 81 for (j=0; j<bs; j+=BS) \ 82 for (k=j; k<j+BS; k++) \ 83 p[i*bs+k] = u[idx[i]*bs+k]; \ 84 } \ 85 static void CPPJoin3_(UnpackInsert_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 86 type *u = (type*)unpacked; \ 87 const type *p = (const type*)packed; \ 88 PetscInt i,j,k; \ 89 for (i=0; i<n; i++) \ 90 for (j=0; j<bs; j+=BS) \ 91 for (k=j; k<j+BS; k++) \ 92 u[idx[i]*bs+k] = p[i*bs+k]; \ 93 } \ 94 static void CPPJoin3_(FetchAndInsert_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 95 type *u = (type*)unpacked; \ 96 type *p = (type*)packed; \ 97 PetscInt i,j,k; \ 98 for (i=0; i<n; i++) { \ 99 PetscInt ii = idx[i]; \ 100 for (j=0; j<bs; j+=BS) \ 101 for (k=j; k<j+BS; k++) { \ 102 type t = u[ii*bs+k]; \ 103 u[ii*bs+k] = p[i*bs+k]; \ 104 p[i*bs+k] = t; \ 105 } \ 106 } \ 107 } 108 109 /* Basic types defining addition */ 110 #define DEF_PackAddNoInit(type,BS) \ 111 DEF_PackNoInit(type,BS) \ 112 static void CPPJoin3_(UnpackAdd_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 113 type *u = (type*)unpacked; \ 114 const type *p = (const type*)packed; \ 115 PetscInt i,j,k; \ 116 for (i=0; i<n; i++) \ 117 for (j=0; j<bs; j+=BS) \ 118 for (k=j; k<j+BS; k++) \ 119 u[idx[i]*bs+k] += p[i*bs+k]; \ 120 } \ 121 static void CPPJoin3_(FetchAndAdd_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 122 type *u = (type*)unpacked; \ 123 type *p = (type*)packed; \ 124 PetscInt i,j,k; \ 125 for (i=0; i<n; i++) { \ 126 PetscInt ii = idx[i]; \ 127 for (j=0; j<bs; j+=BS) \ 128 for (k=j; k<j+BS; k++) { \ 129 type t = u[ii*bs+k]; \ 130 u[ii*bs+k] = t + p[i*bs+k]; \ 131 p[i*bs+k] = t; \ 132 } \ 133 } \ 134 } \ 135 static void CPPJoin3_(UnpackMult_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 136 type *u = (type*)unpacked; \ 137 const type *p = (const type*)packed; \ 138 PetscInt i,j,k; \ 139 for (i=0; i<n; i++) \ 140 for (j=0; j<bs; j+=BS) \ 141 for (k=j; k<j+BS; k++) \ 142 u[idx[i]*bs+k] *= p[i*bs+k]; \ 143 } \ 144 static void CPPJoin3_(FetchAndMult_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 145 type *u = (type*)unpacked; \ 146 type *p = (type*)packed; \ 147 PetscInt i,j,k; \ 148 for (i=0; i<n; i++) { \ 149 PetscInt ii = idx[i]; \ 150 for (j=0; j<bs; j+=BS) \ 151 for (k=j; k<j+BS; k++) { \ 152 type t = u[ii*bs+k]; \ 153 u[ii*bs+k] = t * p[i*bs+k]; \ 154 p[i*bs+k] = t; \ 155 } \ 156 } \ 157 } 158 #define DEF_Pack(type,BS) \ 159 DEF_PackAddNoInit(type,BS) \ 160 static void CPPJoin3_(PackInit_,type,BS)(PetscSFBasicPack link) { \ 161 link->Pack = CPPJoin3_(Pack_,type,BS); \ 162 link->UnpackInsert = CPPJoin3_(UnpackInsert_,type,BS); \ 163 link->UnpackAdd = CPPJoin3_(UnpackAdd_,type,BS); \ 164 link->UnpackMult = CPPJoin3_(UnpackMult_,type,BS); \ 165 link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,type,BS); \ 166 link->FetchAndAdd = CPPJoin3_(FetchAndAdd_,type,BS); \ 167 link->FetchAndMult = CPPJoin3_(FetchAndMult_,type,BS); \ 168 link->unitbytes = sizeof(type); \ 169 } 170 /* Comparable types */ 171 #define DEF_PackCmp(type) \ 172 DEF_PackAddNoInit(type,1) \ 173 static void CPPJoin2(UnpackMax_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 174 type *u = (type*)unpacked; \ 175 const type *p = (const type*)packed; \ 176 PetscInt i; \ 177 for (i=0; i<n; i++) { \ 178 type v = u[idx[i]]; \ 179 u[idx[i]] = PetscMax(v,p[i]); \ 180 } \ 181 } \ 182 static void CPPJoin2(UnpackMin_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 183 type *u = (type*)unpacked; \ 184 const type *p = (const type*)packed; \ 185 PetscInt i; \ 186 for (i=0; i<n; i++) { \ 187 type v = u[idx[i]]; \ 188 u[idx[i]] = PetscMin(v,p[i]); \ 189 } \ 190 } \ 191 static void CPPJoin2(FetchAndMax_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 192 type *u = (type*)unpacked; \ 193 type *p = (type*)packed; \ 194 PetscInt i; \ 195 for (i=0; i<n; i++) { \ 196 PetscInt j = idx[i]; \ 197 type v = u[j]; \ 198 u[j] = PetscMax(v,p[i]); \ 199 p[i] = v; \ 200 } \ 201 } \ 202 static void CPPJoin2(FetchAndMin_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 203 type *u = (type*)unpacked; \ 204 type *p = (type*)packed; \ 205 PetscInt i; \ 206 for (i=0; i<n; i++) { \ 207 PetscInt j = idx[i]; \ 208 type v = u[j]; \ 209 u[j] = PetscMin(v,p[i]); \ 210 p[i] = v; \ 211 } \ 212 } \ 213 static void CPPJoin2(PackInit_,type)(PetscSFBasicPack link) { \ 214 link->Pack = CPPJoin3_(Pack_,type,1); \ 215 link->UnpackInsert = CPPJoin3_(UnpackInsert_,type,1); \ 216 link->UnpackAdd = CPPJoin3_(UnpackAdd_,type,1); \ 217 link->UnpackMax = CPPJoin2(UnpackMax_,type); \ 218 link->UnpackMin = CPPJoin2(UnpackMin_,type); \ 219 link->UnpackMult = CPPJoin3_(UnpackMult_,type,1); \ 220 link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,type,1); \ 221 link->FetchAndAdd = CPPJoin3_(FetchAndAdd_ ,type,1); \ 222 link->FetchAndMax = CPPJoin2(FetchAndMax_ ,type); \ 223 link->FetchAndMin = CPPJoin2(FetchAndMin_ ,type); \ 224 link->FetchAndMult = CPPJoin3_(FetchAndMult_,type,1); \ 225 link->unitbytes = sizeof(type); \ 226 } 227 228 /* Logical Types */ 229 #define DEF_PackLog(type) \ 230 static void CPPJoin2(UnpackLAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 231 type *u = (type*)unpacked; \ 232 const type *p = (const type*)packed; \ 233 PetscInt i; \ 234 for (i=0; i<n; i++) { \ 235 type v = u[idx[i]]; \ 236 u[idx[i]] = v && p[i]; \ 237 } \ 238 } \ 239 static void CPPJoin2(UnpackLOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 240 type *u = (type*)unpacked; \ 241 const type *p = (const type*)packed; \ 242 PetscInt i; \ 243 for (i=0; i<n; i++) { \ 244 type v = u[idx[i]]; \ 245 u[idx[i]] = v || p[i]; \ 246 } \ 247 } \ 248 static void CPPJoin2(UnpackLXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 249 type *u = (type*)unpacked; \ 250 const type *p = (const type*)packed; \ 251 PetscInt i; \ 252 for (i=0; i<n; i++) { \ 253 type v = u[idx[i]]; \ 254 u[idx[i]] = (!v)!=(!p[i]); \ 255 } \ 256 } \ 257 static void CPPJoin2(FetchAndLAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 258 type *u = (type*)unpacked; \ 259 type *p = (type*)packed; \ 260 PetscInt i; \ 261 for (i=0; i<n; i++) { \ 262 PetscInt j = idx[i]; \ 263 type v = u[j]; \ 264 u[j] = v && p[i]; \ 265 p[i] = v; \ 266 } \ 267 } \ 268 static void CPPJoin2(FetchAndLOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 269 type *u = (type*)unpacked; \ 270 type *p = (type*)packed; \ 271 PetscInt i; \ 272 for (i=0; i<n; i++) { \ 273 PetscInt j = idx[i]; \ 274 type v = u[j]; \ 275 u[j] = v || p[i]; \ 276 p[i] = v; \ 277 } \ 278 } \ 279 static void CPPJoin2(FetchAndLXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 280 type *u = (type*)unpacked; \ 281 type *p = (type*)packed; \ 282 PetscInt i; \ 283 for (i=0; i<n; i++) { \ 284 PetscInt j = idx[i]; \ 285 type v = u[j]; \ 286 u[j] = (!v)!=(!p[i]); \ 287 p[i] = v; \ 288 } \ 289 } \ 290 static void CPPJoin2(PackInit_Logical_,type)(PetscSFBasicPack link) { \ 291 link->UnpackLAND = CPPJoin2(UnpackLAND_,type); \ 292 link->UnpackLOR = CPPJoin2(UnpackLOR_,type); \ 293 link->UnpackLXOR = CPPJoin2(UnpackLXOR_,type); \ 294 link->FetchAndLAND = CPPJoin2(FetchAndLAND_,type); \ 295 link->FetchAndLOR = CPPJoin2(FetchAndLOR_,type); \ 296 link->FetchAndLXOR = CPPJoin2(FetchAndLXOR_,type); \ 297 } 298 299 300 /* Bitwise Types */ 301 #define DEF_PackBit(type) \ 302 static void CPPJoin2(UnpackBAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 303 type *u = (type*)unpacked; \ 304 const type *p = (const type*)packed; \ 305 PetscInt i; \ 306 for (i=0; i<n; i++) { \ 307 type v = u[idx[i]]; \ 308 u[idx[i]] = v & p[i]; \ 309 } \ 310 } \ 311 static void CPPJoin2(UnpackBOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 312 type *u = (type*)unpacked; \ 313 const type *p = (const type*)packed; \ 314 PetscInt i; \ 315 for (i=0; i<n; i++) { \ 316 type v = u[idx[i]]; \ 317 u[idx[i]] = v | p[i]; \ 318 } \ 319 } \ 320 static void CPPJoin2(UnpackBXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 321 type *u = (type*)unpacked; \ 322 const type *p = (const type*)packed; \ 323 PetscInt i; \ 324 for (i=0; i<n; i++) { \ 325 type v = u[idx[i]]; \ 326 u[idx[i]] = v^p[i]; \ 327 } \ 328 } \ 329 static void CPPJoin2(FetchAndBAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 330 type *u = (type*)unpacked; \ 331 type *p = (type*)packed; \ 332 PetscInt i; \ 333 for (i=0; i<n; i++) { \ 334 PetscInt j = idx[i]; \ 335 type v = u[j]; \ 336 u[j] = v & p[i]; \ 337 p[i] = v; \ 338 } \ 339 } \ 340 static void CPPJoin2(FetchAndBOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 341 type *u = (type*)unpacked; \ 342 type *p = (type*)packed; \ 343 PetscInt i; \ 344 for (i=0; i<n; i++) { \ 345 PetscInt j = idx[i]; \ 346 type v = u[j]; \ 347 u[j] = v | p[i]; \ 348 p[i] = v; \ 349 } \ 350 } \ 351 static void CPPJoin2(FetchAndBXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 352 type *u = (type*)unpacked; \ 353 type *p = (type*)packed; \ 354 PetscInt i; \ 355 for (i=0; i<n; i++) { \ 356 PetscInt j = idx[i]; \ 357 type v = u[j]; \ 358 u[j] = v^p[i]; \ 359 p[i] = v; \ 360 } \ 361 } \ 362 static void CPPJoin2(PackInit_Bitwise_,type)(PetscSFBasicPack link) { \ 363 link->UnpackBAND = CPPJoin2(UnpackBAND_,type); \ 364 link->UnpackBOR = CPPJoin2(UnpackBOR_,type); \ 365 link->UnpackBXOR = CPPJoin2(UnpackBXOR_,type); \ 366 link->FetchAndBAND = CPPJoin2(FetchAndBAND_,type); \ 367 link->FetchAndBOR = CPPJoin2(FetchAndBOR_,type); \ 368 link->FetchAndBXOR = CPPJoin2(FetchAndBXOR_,type); \ 369 } 370 371 /* Pair types */ 372 #define CPPJoinloc_exp(base,op,t1,t2) base ## op ## loc_ ## t1 ## _ ## t2 373 #define CPPJoinloc(base,op,t1,t2) CPPJoinloc_exp(base,op,t1,t2) 374 #define PairType(type1,type2) CPPJoin3_(_pairtype_,type1,type2) 375 #define DEF_UnpackXloc(type1,type2,locname,op) \ 376 static void CPPJoinloc(Unpack,locname,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 377 PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \ 378 const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \ 379 PetscInt i; \ 380 for (i=0; i<n; i++) { \ 381 PetscInt j = idx[i]; \ 382 if (p[i].a op u[j].a) { \ 383 u[j].a = p[i].a; \ 384 u[j].b = p[i].b; \ 385 } else if (u[j].a == p[i].a) { \ 386 u[j].b = PetscMin(u[j].b,p[i].b); \ 387 } \ 388 } \ 389 } \ 390 static void CPPJoinloc(FetchAnd,locname,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 391 PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \ 392 PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \ 393 PetscInt i; \ 394 for (i=0; i<n; i++) { \ 395 PetscInt j = idx[i]; \ 396 PairType(type1,type2) v; \ 397 v.a = u[j].a; \ 398 v.b = u[j].b; \ 399 if (p[i].a op u[j].a) { \ 400 u[j].a = p[i].a; \ 401 u[j].b = p[i].b; \ 402 } else if (u[j].a == p[i].a) { \ 403 u[j].b = PetscMin(u[j].b,p[i].b); \ 404 } \ 405 p[i].a = v.a; \ 406 p[i].b = v.b; \ 407 } \ 408 } 409 #define DEF_PackPair(type1,type2) \ 410 typedef struct {type1 a; type2 b;} PairType(type1,type2); \ 411 static void CPPJoin3_(Pack_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,const void *unpacked,void *packed) { \ 412 const PairType(type1,type2) *u = (const PairType(type1,type2)*)unpacked; \ 413 PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \ 414 PetscInt i; \ 415 for (i=0; i<n; i++) { \ 416 p[i].a = u[idx[i]].a; \ 417 p[i].b = u[idx[i]].b; \ 418 } \ 419 } \ 420 static void CPPJoin3_(UnpackInsert_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 421 PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \ 422 const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \ 423 PetscInt i; \ 424 for (i=0; i<n; i++) { \ 425 u[idx[i]].a = p[i].a; \ 426 u[idx[i]].b = p[i].b; \ 427 } \ 428 } \ 429 static void CPPJoin3_(UnpackAdd_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \ 430 PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \ 431 const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \ 432 PetscInt i; \ 433 for (i=0; i<n; i++) { \ 434 u[idx[i]].a += p[i].a; \ 435 u[idx[i]].b += p[i].b; \ 436 } \ 437 } \ 438 static void CPPJoin3_(FetchAndInsert_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 439 PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \ 440 PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \ 441 PetscInt i; \ 442 for (i=0; i<n; i++) { \ 443 PetscInt j = idx[i]; \ 444 PairType(type1,type2) v; \ 445 v.a = u[j].a; \ 446 v.b = u[j].b; \ 447 u[j].a = p[i].a; \ 448 u[j].b = p[i].b; \ 449 p[i].a = v.a; \ 450 p[i].b = v.b; \ 451 } \ 452 } \ 453 static void FetchAndAdd_ ## type1 ## _ ## type2(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \ 454 PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \ 455 PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \ 456 PetscInt i; \ 457 for (i=0; i<n; i++) { \ 458 PetscInt j = idx[i]; \ 459 PairType(type1,type2) v; \ 460 v.a = u[j].a; \ 461 v.b = u[j].b; \ 462 u[j].a = v.a + p[i].a; \ 463 u[j].b = v.b + p[i].b; \ 464 p[i].a = v.a; \ 465 p[i].b = v.b; \ 466 } \ 467 } \ 468 DEF_UnpackXloc(type1,type2,Max,>) \ 469 DEF_UnpackXloc(type1,type2,Min,<) \ 470 static void CPPJoin3_(PackInit_,type1,type2)(PetscSFBasicPack link) { \ 471 link->Pack = CPPJoin3_(Pack_,type1,type2); \ 472 link->UnpackInsert = CPPJoin3_(UnpackInsert_,type1,type2); \ 473 link->UnpackAdd = CPPJoin3_(UnpackAdd_,type1,type2); \ 474 link->UnpackMaxloc = CPPJoin3_(UnpackMaxloc_,type1,type2); \ 475 link->UnpackMinloc = CPPJoin3_(UnpackMinloc_,type1,type2); \ 476 link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,type1,type2); \ 477 link->FetchAndAdd = CPPJoin3_(FetchAndAdd_,type1,type2); \ 478 link->FetchAndMaxloc = CPPJoin3_(FetchAndMaxloc_,type1,type2); \ 479 link->FetchAndMinloc = CPPJoin3_(FetchAndMinloc_,type1,type2); \ 480 link->unitbytes = sizeof(PairType(type1,type2)); \ 481 } 482 483 /* Currently only dumb blocks of data */ 484 #define BlockType(unit,count) CPPJoin3_(_blocktype_,unit,count) 485 #define DEF_Block(unit,count) \ 486 typedef struct {unit v[count];} BlockType(unit,count); \ 487 DEF_PackNoInit(BlockType(unit,count),1) \ 488 static void CPPJoin3_(PackInit_block_,unit,count)(PetscSFBasicPack link) { \ 489 link->Pack = CPPJoin3_(Pack_,BlockType(unit,count),1); \ 490 link->UnpackInsert = CPPJoin3_(UnpackInsert_,BlockType(unit,count),1); \ 491 link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,BlockType(unit,count),1); \ 492 link->unitbytes = sizeof(BlockType(unit,count)); \ 493 } 494 495 DEF_PackCmp(int) 496 DEF_PackBit(int) 497 DEF_PackLog(int) 498 DEF_PackCmp(PetscInt) 499 DEF_PackBit(PetscInt) 500 DEF_PackLog(PetscInt) 501 DEF_Pack(PetscInt,2) 502 DEF_Pack(PetscInt,3) 503 DEF_Pack(PetscInt,4) 504 DEF_Pack(PetscInt,5) 505 DEF_Pack(PetscInt,7) 506 DEF_PackCmp(PetscReal) 507 DEF_PackLog(PetscReal) 508 DEF_Pack(PetscReal,2) 509 DEF_Pack(PetscReal,3) 510 DEF_Pack(PetscReal,4) 511 DEF_Pack(PetscReal,5) 512 DEF_Pack(PetscReal,7) 513 #if defined(PETSC_HAVE_COMPLEX) 514 DEF_Pack(PetscComplex,1) 515 DEF_Pack(PetscComplex,2) 516 DEF_Pack(PetscComplex,3) 517 DEF_Pack(PetscComplex,4) 518 DEF_Pack(PetscComplex,5) 519 DEF_Pack(PetscComplex,7) 520 #endif 521 DEF_PackPair(int,int) 522 DEF_PackPair(PetscInt,PetscInt) 523 DEF_Block(int,1) 524 DEF_Block(int,2) 525 DEF_Block(int,3) 526 DEF_Block(int,4) 527 DEF_Block(int,5) 528 DEF_Block(int,6) 529 DEF_Block(int,7) 530 DEF_Block(int,8) 531 532 static PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 533 { 534 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 535 PetscErrorCode ierr; 536 PetscInt *rlengths,*ilengths,i; 537 PetscMPIInt rank,niranks,*iranks; 538 MPI_Comm comm; 539 MPI_Group group; 540 MPI_Request *rootreqs,*leafreqs; 541 542 PetscFunctionBegin; 543 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 544 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 545 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 546 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 547 ierr = PetscObjectGetNewTag((PetscObject)sf,&bas->tag);CHKERRQ(ierr); 548 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 549 /* 550 * Inform roots about how many leaves and from which ranks 551 */ 552 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 553 /* Determine number, sending ranks, and length of incoming */ 554 for (i=0; i<sf->nranks; i++) { 555 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 556 } 557 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 558 559 /* Partition into distinguished and non-distinguished incoming ranks */ 560 bas->ndiranks = sf->ndranks; 561 bas->niranks = bas->ndiranks + niranks; 562 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 563 bas->ioffset[0] = 0; 564 for (i=0; i<bas->ndiranks; i++) { 565 bas->iranks[i] = sf->ranks[i]; 566 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 567 } 568 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 569 for ( ; i<bas->niranks; i++) { 570 bas->iranks[i] = iranks[i-bas->ndiranks]; 571 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 572 } 573 bas->itotal = bas->ioffset[i]; 574 ierr = PetscFree(rlengths);CHKERRQ(ierr); 575 ierr = PetscFree(iranks);CHKERRQ(ierr); 576 ierr = PetscFree(ilengths);CHKERRQ(ierr); 577 578 /* Send leaf identities to roots */ 579 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 580 ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 581 for (i=bas->ndiranks; i<bas->niranks; i++) { 582 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],bas->tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 583 } 584 for (i=0; i<sf->nranks; i++) { 585 PetscMPIInt npoints; 586 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 587 if (i < sf->ndranks) { 588 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 589 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 590 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 591 ierr = PetscMemcpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints*sizeof(bas->irootloc[0]));CHKERRQ(ierr); 592 continue; 593 } 594 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],bas->tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 595 } 596 ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 597 ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 598 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 602 static PetscErrorCode PetscSFBasicPackTypeSetup(PetscSFBasicPack link,MPI_Datatype unit) 603 { 604 PetscErrorCode ierr; 605 PetscBool isInt,isPetscInt,isPetscReal,is2Int,is2PetscInt; 606 PetscInt nPetscIntContig,nPetscRealContig; 607 #if defined(PETSC_HAVE_COMPLEX) 608 PetscBool isPetscComplex; 609 PetscInt nPetscComplexContig; 610 #endif 611 612 PetscFunctionBegin; 613 ierr = MPIPetsc_Type_compare(unit,MPI_INT,&isInt);CHKERRQ(ierr); 614 ierr = MPIPetsc_Type_compare(unit,MPIU_INT,&isPetscInt);CHKERRQ(ierr); 615 ierr = MPIPetsc_Type_compare_contig(unit,MPIU_INT,&nPetscIntContig);CHKERRQ(ierr); 616 ierr = MPIPetsc_Type_compare(unit,MPIU_REAL,&isPetscReal);CHKERRQ(ierr); 617 ierr = MPIPetsc_Type_compare_contig(unit,MPIU_REAL,&nPetscRealContig);CHKERRQ(ierr); 618 #if defined(PETSC_HAVE_COMPLEX) 619 ierr = MPIPetsc_Type_compare(unit,MPIU_COMPLEX,&isPetscComplex);CHKERRQ(ierr); 620 ierr = MPIPetsc_Type_compare_contig(unit,MPIU_COMPLEX,&nPetscComplexContig);CHKERRQ(ierr); 621 #endif 622 ierr = MPIPetsc_Type_compare(unit,MPI_2INT,&is2Int);CHKERRQ(ierr); 623 ierr = MPIPetsc_Type_compare(unit,MPIU_2INT,&is2PetscInt);CHKERRQ(ierr); 624 link->bs = 1; 625 if (isInt) {PackInit_int(link); PackInit_Logical_int(link); PackInit_Bitwise_int(link);} 626 else if (isPetscInt) {PackInit_PetscInt(link); PackInit_Logical_PetscInt(link); PackInit_Bitwise_PetscInt(link);} 627 else if (isPetscReal) {PackInit_PetscReal(link); PackInit_Logical_PetscReal(link);} 628 #if defined(PETSC_HAVE_COMPLEX) 629 else if (isPetscComplex) PackInit_PetscComplex_1(link); 630 #endif 631 else if (is2Int) PackInit_int_int(link); 632 else if (is2PetscInt) PackInit_PetscInt_PetscInt(link); 633 else if (nPetscIntContig) { 634 if (nPetscIntContig%7 == 0) PackInit_PetscInt_7(link); 635 else if (nPetscIntContig%5 == 0) PackInit_PetscInt_5(link); 636 else if (nPetscIntContig%4 == 0) PackInit_PetscInt_4(link); 637 else if (nPetscIntContig%3 == 0) PackInit_PetscInt_3(link); 638 else if (nPetscIntContig%2 == 0) PackInit_PetscInt_2(link); 639 else PackInit_PetscInt(link); 640 link->bs = nPetscIntContig; 641 link->unitbytes *= nPetscIntContig; 642 } else if (nPetscRealContig) { 643 if (nPetscRealContig%7 == 0) PackInit_PetscReal_7(link); 644 else if (nPetscRealContig%5 == 0) PackInit_PetscReal_5(link); 645 else if (nPetscRealContig%4 == 0) PackInit_PetscReal_4(link); 646 else if (nPetscRealContig%3 == 0) PackInit_PetscReal_3(link); 647 else if (nPetscRealContig%2 == 0) PackInit_PetscReal_2(link); 648 else PackInit_PetscReal(link); 649 link->bs = nPetscRealContig; 650 link->unitbytes *= nPetscRealContig; 651 #if defined(PETSC_HAVE_COMPLEX) 652 } else if (nPetscComplexContig) { 653 if (nPetscComplexContig%7 == 0) PackInit_PetscComplex_7(link); 654 else if (nPetscComplexContig%5 == 0) PackInit_PetscComplex_5(link); 655 else if (nPetscComplexContig%4 == 0) PackInit_PetscComplex_4(link); 656 else if (nPetscComplexContig%3 == 0) PackInit_PetscComplex_3(link); 657 else if (nPetscComplexContig%2 == 0) PackInit_PetscComplex_2(link); 658 else PackInit_PetscComplex_1(link); 659 link->bs = nPetscComplexContig; 660 link->unitbytes *= nPetscComplexContig; 661 #endif 662 } else { 663 MPI_Aint lb,bytes; 664 ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRQ(ierr); 665 if (lb != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 666 if (bytes % sizeof(int)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for type size not divisible by %D",sizeof(int)); 667 switch (bytes / sizeof(int)) { 668 case 1: PackInit_block_int_1(link); break; 669 case 2: PackInit_block_int_2(link); break; 670 case 3: PackInit_block_int_3(link); break; 671 case 4: PackInit_block_int_4(link); break; 672 case 5: PackInit_block_int_5(link); break; 673 case 6: PackInit_block_int_6(link); break; 674 case 7: PackInit_block_int_7(link); break; 675 case 8: PackInit_block_int_8(link); break; 676 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for arbitrary block sizes"); 677 } 678 } 679 ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 static PetscErrorCode PetscSFBasicPackGetUnpackOp(PetscSF sf,PetscSFBasicPack link,MPI_Op op,void (**UnpackOp)(PetscInt,PetscInt,const PetscInt*,void*,const void*)) 684 { 685 PetscFunctionBegin; 686 *UnpackOp = NULL; 687 if (op == MPIU_REPLACE) *UnpackOp = link->UnpackInsert; 688 else if (op == MPI_SUM || op == MPIU_SUM) *UnpackOp = link->UnpackAdd; 689 else if (op == MPI_PROD) *UnpackOp = link->UnpackMult; 690 else if (op == MPI_MAX || op == MPIU_MAX) *UnpackOp = link->UnpackMax; 691 else if (op == MPI_MIN || op == MPIU_MIN) *UnpackOp = link->UnpackMin; 692 else if (op == MPI_LAND) *UnpackOp = link->UnpackLAND; 693 else if (op == MPI_BAND) *UnpackOp = link->UnpackBAND; 694 else if (op == MPI_LOR) *UnpackOp = link->UnpackLOR; 695 else if (op == MPI_BOR) *UnpackOp = link->UnpackBOR; 696 else if (op == MPI_LXOR) *UnpackOp = link->UnpackLXOR; 697 else if (op == MPI_BXOR) *UnpackOp = link->UnpackBXOR; 698 else if (op == MPI_MAXLOC) *UnpackOp = link->UnpackMaxloc; 699 else if (op == MPI_MINLOC) *UnpackOp = link->UnpackMinloc; 700 else *UnpackOp = NULL; 701 PetscFunctionReturn(0); 702 } 703 static PetscErrorCode PetscSFBasicPackGetFetchAndOp(PetscSF sf,PetscSFBasicPack link,MPI_Op op,void (**FetchAndOp)(PetscInt,PetscInt,const PetscInt*,void*,void*)) 704 { 705 PetscFunctionBegin; 706 *FetchAndOp = NULL; 707 if (op == MPIU_REPLACE) *FetchAndOp = link->FetchAndInsert; 708 else if (op == MPI_SUM || op == MPIU_SUM) *FetchAndOp = link->FetchAndAdd; 709 else if (op == MPI_MAX || op == MPIU_MAX) *FetchAndOp = link->FetchAndMax; 710 else if (op == MPI_MIN || op == MPIU_MIN) *FetchAndOp = link->FetchAndMin; 711 else if (op == MPI_MAXLOC) *FetchAndOp = link->FetchAndMaxloc; 712 else if (op == MPI_MINLOC) *FetchAndOp = link->FetchAndMinloc; 713 else if (op == MPI_PROD) *FetchAndOp = link->FetchAndMult; 714 else if (op == MPI_LAND) *FetchAndOp = link->FetchAndLAND; 715 else if (op == MPI_BAND) *FetchAndOp = link->FetchAndBAND; 716 else if (op == MPI_LOR) *FetchAndOp = link->FetchAndLOR; 717 else if (op == MPI_BOR) *FetchAndOp = link->FetchAndBOR; 718 else if (op == MPI_LXOR) *FetchAndOp = link->FetchAndLXOR; 719 else if (op == MPI_BXOR) *FetchAndOp = link->FetchAndBXOR; 720 else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for MPI_Op"); 721 PetscFunctionReturn(0); 722 } 723 724 static PetscErrorCode PetscSFBasicPackGetReqs(PetscSF sf,PetscSFBasicPack link,MPI_Request **rootreqs,MPI_Request **leafreqs) 725 { 726 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 727 728 PetscFunctionBegin; 729 if (rootreqs) *rootreqs = link->requests; 730 if (leafreqs) *leafreqs = link->requests + (bas->niranks - bas->ndiranks); 731 PetscFunctionReturn(0); 732 } 733 734 static PetscErrorCode PetscSFBasicPackWaitall(PetscSF sf,PetscSFBasicPack link) 735 { 736 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 ierr = MPI_Waitall(bas->niranks+sf->nranks-(bas->ndiranks+sf->ndranks),link->requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 static PetscErrorCode PetscSFBasicGetRootInfo(PetscSF sf,PetscInt *nrootranks,PetscInt *ndrootranks,const PetscMPIInt **rootranks,const PetscInt **rootoffset,const PetscInt **rootloc) 745 { 746 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 747 748 PetscFunctionBegin; 749 if (nrootranks) *nrootranks = bas->niranks; 750 if (ndrootranks) *ndrootranks = bas->ndiranks; 751 if (rootranks) *rootranks = bas->iranks; 752 if (rootoffset) *rootoffset = bas->ioffset; 753 if (rootloc) *rootloc = bas->irootloc; 754 PetscFunctionReturn(0); 755 } 756 757 static PetscErrorCode PetscSFBasicGetLeafInfo(PetscSF sf,PetscInt *nleafranks,PetscInt *ndleafranks,const PetscMPIInt **leafranks,const PetscInt **leafoffset,const PetscInt **leafloc) 758 { 759 PetscFunctionBegin; 760 if (nleafranks) *nleafranks = sf->nranks; 761 if (ndleafranks) *ndleafranks = sf->ndranks; 762 if (leafranks) *leafranks = sf->ranks; 763 if (leafoffset) *leafoffset = sf->roffset; 764 if (leafloc) *leafloc = sf->rmine; 765 PetscFunctionReturn(0); 766 } 767 768 static PetscErrorCode PetscSFBasicGetPack(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFBasicPack *mylink) 769 { 770 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 771 PetscErrorCode ierr; 772 PetscSFBasicPack link,*p; 773 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks,i; 774 const PetscInt *rootoffset,*leafoffset; 775 776 PetscFunctionBegin; 777 /* Look for types in cache */ 778 for (p=&bas->avail; (link=*p); p=&link->next) { 779 PetscBool match; 780 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 781 if (match) { 782 *p = link->next; /* Remove from available list */ 783 goto found; 784 } 785 } 786 787 /* Create new composite types for each send rank */ 788 ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 789 ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL);CHKERRQ(ierr); 790 ierr = PetscNew(&link);CHKERRQ(ierr); 791 ierr = PetscSFBasicPackTypeSetup(link,unit);CHKERRQ(ierr); 792 ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr); 793 for (i=0; i<nrootranks; i++) { 794 ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr); 795 } 796 for (i=0; i<nleafranks; i++) { 797 if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */ 798 if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks"); 799 link->leaf[i] = link->root[0]; 800 continue; 801 } 802 ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr); 803 } 804 ierr = PetscCalloc1(nrootranks+nleafranks,&link->requests);CHKERRQ(ierr); 805 806 found: 807 link->key = key; 808 link->next = bas->inuse; 809 bas->inuse = link; 810 811 *mylink = link; 812 PetscFunctionReturn(0); 813 } 814 815 static PetscErrorCode PetscSFBasicGetPackInUse(PetscSF sf,MPI_Datatype unit,const void *key,PetscCopyMode cmode,PetscSFBasicPack *mylink) 816 { 817 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 818 PetscErrorCode ierr; 819 PetscSFBasicPack link,*p; 820 821 PetscFunctionBegin; 822 /* Look for types in cache */ 823 for (p=&bas->inuse; (link=*p); p=&link->next) { 824 PetscBool match; 825 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 826 if (match && (key == link->key)) { 827 switch (cmode) { 828 case PETSC_OWN_POINTER: *p = link->next; break; /* Remove from inuse list */ 829 case PETSC_USE_POINTER: break; 830 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"invalid cmode"); 831 } 832 *mylink = link; 833 PetscFunctionReturn(0); 834 } 835 } 836 SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Could not find pack"); 837 PetscFunctionReturn(0); 838 } 839 840 static PetscErrorCode PetscSFBasicReclaimPack(PetscSF sf,PetscSFBasicPack *link) 841 { 842 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 843 844 PetscFunctionBegin; 845 (*link)->key = NULL; 846 (*link)->next = bas->avail; 847 bas->avail = *link; 848 *link = NULL; 849 PetscFunctionReturn(0); 850 } 851 852 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 853 { 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 858 ierr = PetscOptionsTail();CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 static PetscErrorCode PetscSFReset_Basic(PetscSF sf) 863 { 864 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 865 PetscErrorCode ierr; 866 PetscSFBasicPack link,next; 867 868 PetscFunctionBegin; 869 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 870 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 871 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 872 for (link=bas->avail; link; link=next) { 873 PetscInt i; 874 next = link->next; 875 #if defined(PETSC_HAVE_MPI_TYPE_DUP) 876 ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr); 877 #endif 878 for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);} 879 for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */ 880 ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr); 881 ierr = PetscFree(link->requests);CHKERRQ(ierr); 882 ierr = PetscFree(link);CHKERRQ(ierr); 883 } 884 bas->avail = NULL; 885 PetscFunctionReturn(0); 886 } 887 888 static PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 889 { 890 PetscErrorCode ierr; 891 892 PetscFunctionBegin; 893 ierr = PetscFree(sf->data);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 static PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 898 { 899 /* PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; */ 900 PetscErrorCode ierr; 901 PetscBool iascii; 902 903 PetscFunctionBegin; 904 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 905 if (iascii) { 906 ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 /* Send from roots to leaves */ 912 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 913 { 914 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 915 PetscErrorCode ierr; 916 PetscSFBasicPack link; 917 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 918 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 919 const PetscMPIInt *rootranks,*leafranks; 920 MPI_Request *rootreqs,*leafreqs; 921 922 PetscFunctionBegin; 923 ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 924 ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc);CHKERRQ(ierr); 925 ierr = PetscSFBasicGetPack(sf,unit,rootdata,&link);CHKERRQ(ierr); 926 927 ierr = PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);CHKERRQ(ierr); 928 /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */ 929 for (i=ndleafranks; i<nleafranks; i++) { 930 PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 931 ierr = MPI_Irecv(link->leaf[i],n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i-ndleafranks]);CHKERRQ(ierr); 932 } 933 /* Pack and send root data */ 934 for (i=0; i<nrootranks; i++) { 935 PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 936 void *packstart = link->root[i]; 937 (*link->Pack)(n,link->bs,rootloc+rootoffset[i],rootdata,packstart); 938 if (i < ndrootranks) continue; /* shared memory */ 939 ierr = MPI_Isend(packstart,n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i-ndrootranks]);CHKERRQ(ierr); 940 } 941 PetscFunctionReturn(0); 942 } 943 944 PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 945 { 946 PetscErrorCode ierr; 947 PetscSFBasicPack link; 948 PetscInt i,nleafranks,ndleafranks; 949 const PetscInt *leafoffset,*leafloc; 950 951 PetscFunctionBegin; 952 ierr = PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 953 ierr = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr); 954 ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc);CHKERRQ(ierr); 955 for (i=0; i<nleafranks; i++) { 956 PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 957 const void *packstart = link->leaf[i]; 958 (*link->UnpackInsert)(n,link->bs,leafloc+leafoffset[i],leafdata,packstart); 959 } 960 ierr = PetscSFBasicReclaimPack(sf,&link);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 /* leaf -> root with reduction */ 965 PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 966 { 967 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 968 PetscSFBasicPack link; 969 PetscErrorCode ierr; 970 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 971 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 972 const PetscMPIInt *rootranks,*leafranks; 973 MPI_Request *rootreqs,*leafreqs; 974 975 PetscFunctionBegin; 976 ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 977 ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc);CHKERRQ(ierr); 978 ierr = PetscSFBasicGetPack(sf,unit,rootdata,&link);CHKERRQ(ierr); 979 980 ierr = PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);CHKERRQ(ierr); 981 /* Eagerly post root receives for non-distinguished ranks */ 982 for (i=ndrootranks; i<nrootranks; i++) { 983 PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 984 ierr = MPI_Irecv(link->root[i],n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i-ndrootranks]);CHKERRQ(ierr); 985 } 986 /* Pack and send leaf data */ 987 for (i=0; i<nleafranks; i++) { 988 PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 989 void *packstart = link->leaf[i]; 990 (*link->Pack)(n,link->bs,leafloc+leafoffset[i],leafdata,packstart); 991 if (i < ndleafranks) continue; /* shared memory */ 992 ierr = MPI_Isend(packstart,n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i-ndleafranks]);CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 static PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 998 { 999 void (*UnpackOp)(PetscInt,PetscInt,const PetscInt*,void*,const void*); 1000 PetscErrorCode ierr; 1001 PetscSFBasicPack link; 1002 PetscInt i,nrootranks; 1003 PetscMPIInt typesize = -1; 1004 const PetscInt *rootoffset,*rootloc; 1005 1006 PetscFunctionBegin; 1007 ierr = PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 1008 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 1009 ierr = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr); 1010 ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr); 1011 ierr = PetscSFBasicPackGetUnpackOp(sf,link,op,&UnpackOp);CHKERRQ(ierr); 1012 if (UnpackOp) { 1013 typesize = link->unitbytes; 1014 } 1015 else { 1016 ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); 1017 } 1018 for (i=0; i<nrootranks; i++) { 1019 PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 1020 char *packstart = (char *) link->root[i]; 1021 1022 if (UnpackOp) { 1023 (*UnpackOp)(n,link->bs,rootloc+rootoffset[i],rootdata,(const void *)packstart); 1024 } 1025 #if PETSC_HAVE_MPI_REDUCE_LOCAL 1026 else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 1027 PetscInt j; 1028 1029 for (j = 0; j < n; j++) { 1030 ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); 1031 } 1032 } 1033 #else 1034 else { 1035 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 1036 } 1037 #endif 1038 } 1039 ierr = PetscSFBasicReclaimPack(sf,&link);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 static PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1044 { 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = PetscSFReduceBegin_Basic(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1053 { 1054 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 1055 void (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,void*,void*); 1056 PetscErrorCode ierr; 1057 PetscSFBasicPack link; 1058 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 1059 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 1060 const PetscMPIInt *rootranks,*leafranks; 1061 MPI_Request *rootreqs,*leafreqs; 1062 1063 PetscFunctionBegin; 1064 ierr = PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 1065 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 1066 ierr = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr); 1067 ierr = PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 1068 ierr = PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc);CHKERRQ(ierr); 1069 ierr = PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);CHKERRQ(ierr); 1070 /* Post leaf receives */ 1071 for (i=ndleafranks; i<nleafranks; i++) { 1072 PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 1073 ierr = MPI_Irecv(link->leaf[i],n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i-ndleafranks]);CHKERRQ(ierr); 1074 } 1075 /* Process local fetch-and-op, post root sends */ 1076 ierr = PetscSFBasicPackGetFetchAndOp(sf,link,op,&FetchAndOp);CHKERRQ(ierr); 1077 for (i=0; i<nrootranks; i++) { 1078 PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 1079 void *packstart = link->root[i]; 1080 1081 (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],rootdata,packstart); 1082 if (i < ndrootranks) continue; /* shared memory */ 1083 ierr = MPI_Isend(packstart,n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i-ndrootranks]);CHKERRQ(ierr); 1084 } 1085 ierr = PetscSFBasicPackWaitall(sf,link);CHKERRQ(ierr); 1086 for (i=0; i<nleafranks; i++) { 1087 PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 1088 const void *packstart = link->leaf[i]; 1089 (*link->UnpackInsert)(n,link->bs,leafloc+leafoffset[i],leafupdate,packstart); 1090 } 1091 ierr = PetscSFBasicReclaimPack(sf,&link);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 1096 { 1097 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 sf->ops->SetUp = PetscSFSetUp_Basic; 1102 sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 1103 sf->ops->Reset = PetscSFReset_Basic; 1104 sf->ops->Destroy = PetscSFDestroy_Basic; 1105 sf->ops->View = PetscSFView_Basic; 1106 sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 1107 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 1108 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 1109 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 1110 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 1111 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 1112 1113 ierr = PetscNewLog(sf,&bas);CHKERRQ(ierr); 1114 sf->data = (void*)bas; 1115 PetscFunctionReturn(0); 1116 } 1117