1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 3 typedef struct _n_PetscSFDataLink *PetscSFDataLink; 4 typedef struct _n_PetscSFWinLink *PetscSFWinLink; 5 6 typedef struct { 7 PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */ 8 PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */ 9 PetscSFWinLink wins; /* List of active windows */ 10 PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */ 11 PetscSF dynsf; 12 MPI_Info info; 13 MPI_Comm window_comm; 14 PetscBool is_empty; 15 PetscMPIInt *wcommranks; 16 } PetscSF_Window; 17 18 struct _n_PetscSFDataLink { 19 MPI_Datatype unit; 20 MPI_Datatype *mine; 21 MPI_Datatype *remote; 22 PetscSFDataLink next; 23 }; 24 25 struct _n_PetscSFWinLink { 26 PetscBool inuse; 27 MPI_Aint bytes; 28 void *addr; 29 void *rootdata; 30 void *leafdata; 31 MPI_Win win; 32 MPI_Request *reqs; 33 PetscSFWindowFlavorType flavor; 34 MPI_Aint *dyn_target_addr; 35 PetscBool epoch; 36 PetscBool persistent; 37 PetscSFWinLink next; 38 }; 39 40 const char *const PetscSFWindowSyncTypes[] = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL}; 41 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL}; 42 43 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */ 44 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op) 45 { 46 PetscFunctionBegin; 47 if (*op == MPIU_SUM) *op = MPI_SUM; 48 else if (*op == MPIU_MAX) *op = MPI_MAX; 49 else if (*op == MPIU_MIN) *op = MPI_MIN; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 /* 54 PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank 55 56 Not Collective 57 58 Input Parameters: 59 + sf - star forest of type `PETSCSFWINDOW` 60 - unit - data type for each node 61 62 Output Parameters: 63 + localtypes - types describing part of local leaf buffer referencing each remote rank 64 - remotetypes - types describing part of remote root buffer referenced for each remote rank 65 66 Level: developer 67 68 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()` 69 @*/ 70 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes) 71 { 72 PetscSF_Window *w = (PetscSF_Window *)sf->data; 73 PetscSFDataLink link; 74 PetscInt i, nranks; 75 const PetscInt *roffset, *rmine, *rremote; 76 77 PetscFunctionBegin; 78 /* Look for types in cache */ 79 for (link = w->link; link; link = link->next) { 80 PetscBool match; 81 PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match)); 82 if (match) { 83 *localtypes = link->mine; 84 *remotetypes = link->remote; 85 PetscFunctionReturn(PETSC_SUCCESS); 86 } 87 } 88 89 /* Create new composite types for each send rank */ 90 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, &roffset, &rmine, &rremote)); 91 PetscCall(PetscNew(&link)); 92 PetscCallMPI(MPI_Type_dup(unit, &link->unit)); 93 PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote)); 94 for (i = 0; i < nranks; i++) { 95 PetscInt rcount = roffset[i + 1] - roffset[i]; 96 PetscMPIInt *rmine, *rremote; 97 #if !defined(PETSC_USE_64BIT_INDICES) 98 rmine = sf->rmine + sf->roffset[i]; 99 rremote = sf->rremote + sf->roffset[i]; 100 #else 101 PetscInt j; 102 PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote)); 103 for (j = 0; j < rcount; j++) { 104 PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j)); 105 PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j)); 106 } 107 #endif 108 109 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i])); 110 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i])); 111 #if defined(PETSC_USE_64BIT_INDICES) 112 PetscCall(PetscFree2(rmine, rremote)); 113 #endif 114 PetscCallMPI(MPI_Type_commit(&link->mine[i])); 115 PetscCallMPI(MPI_Type_commit(&link->remote[i])); 116 } 117 link->next = w->link; 118 w->link = link; 119 120 *localtypes = link->mine; 121 *remotetypes = link->remote; 122 PetscFunctionReturn(PETSC_SUCCESS); 123 } 124 125 /*@ 126 PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation 127 128 Logically Collective 129 130 Input Parameters: 131 + sf - star forest for communication of type `PETSCSFWINDOW` 132 - flavor - flavor type 133 134 Options Database Key: 135 . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`) 136 137 Level: advanced 138 139 Notes: 140 Windows reuse follows these rules\: 141 .vb 142 PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create 143 144 PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g. 145 PetscSFRegisterPersistent(sf,rootdata1,leafdata); 146 for i=1 to K 147 PetscSFOperationBegin(sf,rootdata1,leafdata); 148 PetscSFOperationEnd(sf,rootdata1,leafdata); 149 ... 150 PetscSFOperationBegin(sf,rootdata1,leafdata); 151 PetscSFOperationEnd(sf,rootdata1,leafdata); 152 endfor 153 PetscSFDeregisterPersistent(sf,rootdata1,leafdata); 154 155 The following pattern will instead raise an error 156 PetscSFOperationBegin(sf,rootdata1,leafdata); 157 PetscSFOperationEnd(sf,rootdata1,leafdata); 158 PetscSFOperationBegin(sf,rank ? rootdata1 : rootdata2,leafdata); 159 PetscSFOperationEnd(sf,rank ? rootdata1 : rootdata2,leafdata); 160 161 PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use 162 163 PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE 164 .ve 165 166 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()` 167 @*/ 168 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor) 169 { 170 PetscFunctionBegin; 171 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 172 PetscValidLogicalCollectiveEnum(sf, flavor, 2); 173 PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor)); 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor) 178 { 179 PetscSF_Window *w = (PetscSF_Window *)sf->data; 180 181 PetscFunctionBegin; 182 w->flavor = flavor; 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 /*@ 187 PetscSFWindowGetFlavorType - Get `PETSCSFWINDOW` flavor type for `PetscSF` communication 188 189 Logically Collective 190 191 Input Parameter: 192 . sf - star forest for communication of type `PETSCSFWINDOW` 193 194 Output Parameter: 195 . flavor - flavor type 196 197 Level: advanced 198 199 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()` 200 @*/ 201 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor) 202 { 203 PetscFunctionBegin; 204 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 205 PetscAssertPointer(flavor, 2); 206 PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor)); 207 PetscFunctionReturn(PETSC_SUCCESS); 208 } 209 210 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor) 211 { 212 PetscSF_Window *w = (PetscSF_Window *)sf->data; 213 214 PetscFunctionBegin; 215 *flavor = w->flavor; 216 PetscFunctionReturn(PETSC_SUCCESS); 217 } 218 219 /*@ 220 PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 221 222 Logically Collective 223 224 Input Parameters: 225 + sf - star forest for communication 226 - sync - synchronization type 227 228 Options Database Key: 229 . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`) 230 231 Level: advanced 232 233 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType` 234 @*/ 235 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync) 236 { 237 PetscFunctionBegin; 238 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 239 PetscValidLogicalCollectiveEnum(sf, sync, 2); 240 PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync)); 241 PetscFunctionReturn(PETSC_SUCCESS); 242 } 243 244 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync) 245 { 246 PetscSF_Window *w = (PetscSF_Window *)sf->data; 247 248 PetscFunctionBegin; 249 w->sync = sync; 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 /*@ 254 PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 255 256 Logically Collective 257 258 Input Parameter: 259 . sf - star forest for communication 260 261 Output Parameter: 262 . sync - synchronization type 263 264 Level: advanced 265 266 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType` 267 @*/ 268 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync) 269 { 270 PetscFunctionBegin; 271 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 272 PetscAssertPointer(sync, 2); 273 PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync)); 274 PetscFunctionReturn(PETSC_SUCCESS); 275 } 276 277 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync) 278 { 279 PetscSF_Window *w = (PetscSF_Window *)sf->data; 280 281 PetscFunctionBegin; 282 *sync = w->sync; 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 /*@C 287 PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation 288 289 Logically Collective 290 291 Input Parameters: 292 + sf - star forest for communication 293 - info - `MPI_Info` handle 294 295 Level: advanced 296 297 Note: 298 The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`. 299 300 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()` 301 @*/ 302 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info) 303 { 304 PetscFunctionBegin; 305 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 306 PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info)); 307 PetscFunctionReturn(PETSC_SUCCESS); 308 } 309 310 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info) 311 { 312 PetscSF_Window *w = (PetscSF_Window *)sf->data; 313 314 PetscFunctionBegin; 315 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 316 if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info)); 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 /*@C 321 PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation 322 323 Logically Collective 324 325 Input Parameter: 326 . sf - star forest for communication 327 328 Output Parameter: 329 . info - `MPI_Info` handle 330 331 Level: advanced 332 333 Note: 334 If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL` 335 336 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()` 337 @*/ 338 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info) 339 { 340 PetscFunctionBegin; 341 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 342 PetscAssertPointer(info, 2); 343 PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info)); 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info) 348 { 349 PetscSF_Window *w = (PetscSF_Window *)sf->data; 350 351 PetscFunctionBegin; 352 *info = w->info; 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 static PetscErrorCode PetscSFWindowCreateDynamicSF(PetscSF sf, PetscSF *dynsf) 357 { 358 PetscSFNode *remotes; 359 360 PetscFunctionBegin; 361 PetscCall(PetscMalloc1(sf->nranks, &remotes)); 362 for (PetscInt i = 0; i < sf->nranks; i++) { 363 remotes[i].rank = sf->ranks[i]; 364 remotes[i].index = 0; 365 } 366 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, dynsf)); 367 PetscCall(PetscSFSetType(*dynsf, PETSCSFBASIC)); /* break recursion */ 368 PetscCall(PetscSFSetGraph(*dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER)); 369 PetscFunctionReturn(PETSC_SUCCESS); 370 } 371 372 static PetscErrorCode PetscSFWindowAttach(PetscSF sf, PetscSFWinLink link, void *rootdata, size_t wsize) 373 { 374 PetscFunctionBegin; 375 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 376 { 377 PetscSF_Window *w = (PetscSF_Window *)sf->data; 378 MPI_Comm wcomm; 379 MPI_Aint winaddr; 380 void *addr = rootdata; 381 PetscInt nranks; 382 // some Open MPI versions do not support MPI_Win_attach(win,NULL,0); 383 wcomm = w->window_comm; 384 if (addr != NULL) PetscCallMPI(MPI_Win_attach(link->win, addr, wsize)); 385 link->addr = addr; 386 PetscCheck(w->dynsf, wcomm, PETSC_ERR_ORDER, "Must call PetscSFSetUp()"); 387 PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL)); 388 PetscCallMPI(MPI_Get_address(addr, &winaddr)); 389 if (!link->dyn_target_addr) PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr)); 390 PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 391 PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 392 } 393 #else 394 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "dynamic windows not supported"); 395 #endif 396 PetscFunctionReturn(PETSC_SUCCESS); 397 } 398 399 /* 400 PetscSFGetWindow - Get a window for use with a given data type 401 402 Collective 403 404 Input Parameters: 405 + sf - star forest 406 . unit - data type 407 . rootdata - array to be sent 408 . leafdata - only used to help uniquely identify windows 409 . sync - type of synchronization `PetscSFWindowSyncType` 410 . epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window 411 . fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE` 412 . postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 413 - startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 414 415 Output Parameters: 416 + target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only) 417 + reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only) 418 - win - window 419 420 Level: developer 421 422 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()` 423 */ 424 425 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *rootdata, void *leafdata, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win) 426 { 427 PetscSF_Window *w = (PetscSF_Window *)sf->data; 428 MPI_Aint bytes; 429 PetscSFWinLink link; 430 PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE; 431 MPI_Aint wsize; 432 MPI_Comm wcomm; 433 PetscBool is_empty; 434 435 PetscFunctionBegin; 436 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 437 wsize = (MPI_Aint)(bytes * sf->nroots); 438 wcomm = w->window_comm; 439 is_empty = w->is_empty; 440 if (is_empty) { 441 if (target_disp) *target_disp = NULL; 442 if (reqs) *reqs = NULL; 443 *win = MPI_WIN_NULL; 444 PetscFunctionReturn(PETSC_SUCCESS); 445 } 446 if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 447 if (PetscDefined(HAVE_MPI_FEATURE_DYNAMIC_WINDOW) && w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) { 448 // first search for a persistent window 449 for (link = w->wins; reuse && link; link = link->next) { 450 PetscBool match; 451 452 if (!link->persistent) continue; 453 match = (link->flavor == w->flavor && link->rootdata == rootdata && link->leafdata == leafdata) ? PETSC_TRUE : PETSC_FALSE; 454 if (PetscDefined(USE_DEBUG)) { 455 PetscInt matches[2]; 456 PetscInt all_matches[2]; 457 458 matches[0] = match ? 1 : 0; 459 matches[1] = match ? -1 : 0; 460 PetscCallMPI(MPIU_Allreduce(matches, all_matches, 2, MPIU_INT, MPI_MAX, wcomm)); 461 all_matches[1] = -all_matches[1]; 462 PetscCheck(all_matches[0] == all_matches[1], wcomm, PETSC_ERR_ARG_INCOMP, 463 "Inconsistent use across MPI processes of persistent leaf and root data registered with PetscSFRegisterPersistent().\n" 464 "Either the persistent data was changed on a subset of processes (which is not allowed),\n" 465 "or persistent data was not deregistered with PetscSFDeregisterPersistent() before being deallocated"); 466 } 467 if (match) { 468 PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication already in progress on persistent root and leaf data"); 469 PetscCheck(!epoch || !link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication epoch already open for window"); 470 PetscCheck(bytes == link->bytes, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Wrong data type for persistent root and leaf data"); 471 *win = link->win; 472 goto found; 473 } 474 } 475 } 476 for (link = w->wins; reuse && link; link = link->next) { 477 if (w->flavor != link->flavor) continue; 478 /* an existing window can be used (1) if it is not in use, (2) if we are 479 not asking to start an epoch or it does not have an already started 480 epoch, and (3) if it is the right size */ 481 if (!link->inuse && (!epoch || !link->epoch) && bytes == (MPI_Aint)link->bytes) { 482 if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) { 483 PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize)); 484 } else { 485 update = PETSC_TRUE; 486 } 487 link->rootdata = rootdata; 488 link->leafdata = leafdata; 489 PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm)); 490 *win = link->win; 491 goto found; 492 } 493 } 494 495 PetscCall(PetscNew(&link)); 496 link->bytes = bytes; 497 link->next = w->wins; 498 link->flavor = w->flavor; 499 link->dyn_target_addr = NULL; 500 link->reqs = NULL; 501 w->wins = link; 502 link->rootdata = rootdata; 503 link->leafdata = leafdata; 504 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 505 PetscInt i; 506 507 PetscCall(PetscMalloc1(sf->nranks, &link->reqs)); 508 for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL; 509 } 510 switch (w->flavor) { 511 case PETSCSF_WINDOW_FLAVOR_CREATE: 512 PetscCallMPI(MPI_Win_create(rootdata, wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->win)); 513 link->addr = rootdata; 514 break; 515 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 516 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: 517 PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win)); 518 PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize)); 519 break; 520 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: 521 PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win)); 522 update = PETSC_TRUE; 523 break; 524 #endif 525 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 526 case PETSCSF_WINDOW_FLAVOR_SHARED: 527 PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win)); 528 update = PETSC_TRUE; 529 break; 530 #endif 531 default: 532 SETERRQ(wcomm, PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]); 533 } 534 PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm)); 535 *win = link->win; 536 537 found: 538 539 if (target_disp) *target_disp = link->dyn_target_addr; 540 if (reqs) *reqs = link->reqs; 541 if (update) { /* locks are needed for the "separate" memory model only, the fence guarantees memory-synchronization */ 542 PetscMPIInt rank; 543 544 PetscCallMPI(MPI_Comm_rank(wcomm, &rank)); 545 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win)); 546 PetscCall(PetscMemcpy(link->addr, rootdata, sf->nroots * bytes)); 547 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 548 PetscCallMPI(MPI_Win_unlock(rank, *win)); 549 PetscCallMPI(MPI_Win_fence(0, *win)); 550 } 551 } 552 link->inuse = PETSC_TRUE; 553 link->epoch = epoch; 554 if (epoch) { 555 switch (sync) { 556 case PETSCSF_WINDOW_SYNC_FENCE: 557 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 558 break; 559 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 560 break; 561 case PETSCSF_WINDOW_SYNC_ACTIVE: { 562 MPI_Group ingroup, outgroup; 563 PetscMPIInt isize, osize; 564 565 /* Open MPI 4.0.2 with btl=vader does not like calling 566 - MPI_Win_complete when ogroup is empty 567 - MPI_Win_wait when igroup is empty 568 So, we do not even issue the corresponding start and post calls 569 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 570 start(outgroup) has a matching post(ingroup) 571 and this is guaranteed by PetscSF 572 */ 573 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 574 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 575 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 576 if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win)); 577 if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win)); 578 } break; 579 default: 580 SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type"); 581 } 582 } 583 PetscFunctionReturn(PETSC_SUCCESS); 584 } 585 586 /* 587 PetscSFFindWindow - Finds a window that is already in use 588 589 Not Collective 590 591 Input Parameters: 592 + sf - star forest 593 . unit - data type 594 . rootdata - array with which the window is associated 595 - leafdata - only used to help uniquely identify windows 596 597 Output Parameters: 598 + win - window 599 - reqs - outstanding requests associated to the window 600 601 Level: developer 602 603 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 604 */ 605 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata, MPI_Win *win, MPI_Request **reqs) 606 { 607 PetscSF_Window *w = (PetscSF_Window *)sf->data; 608 PetscSFWinLink link; 609 PetscBool is_empty; 610 MPI_Aint bytes; 611 612 PetscFunctionBegin; 613 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 614 *win = MPI_WIN_NULL; 615 is_empty = w->is_empty; 616 if (is_empty) { 617 *reqs = NULL; 618 *win = MPI_WIN_NULL; 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 for (link = w->wins; link; link = link->next) { 622 if (rootdata == link->rootdata && leafdata == link->leafdata && bytes == link->bytes) { 623 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)w->window_comm)); 624 *win = link->win; 625 *reqs = link->reqs; 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 } 629 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 630 } 631 632 /* 633 PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()` 634 635 Collective 636 637 Input Parameters: 638 + sf - star forest 639 . unit - data type 640 . array - array associated with window 641 . sync - type of synchronization `PetscSFWindowSyncType` 642 . epoch - close an epoch, must match argument to `PetscSFGetWindow()` 643 . update - if we have to update the local window array 644 - win - window 645 646 Level: developer 647 648 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()` 649 */ 650 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win) 651 { 652 PetscSF_Window *w = (PetscSF_Window *)sf->data; 653 PetscSFWinLink *p, link; 654 PetscBool reuse = PETSC_FALSE; 655 PetscSFWindowFlavorType flavor; 656 void *laddr; 657 MPI_Aint bytes; 658 MPI_Comm wcomm; 659 660 PetscFunctionBegin; 661 if (*win == MPI_WIN_NULL) PetscFunctionReturn(PETSC_SUCCESS); 662 wcomm = w->window_comm; 663 for (p = &w->wins; *p; p = &(*p)->next) { 664 link = *p; 665 if (*win == link->win) { 666 PetscCheck(array == link->rootdata, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array"); 667 if (epoch != link->epoch) { 668 PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end"); 669 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch"); 670 } 671 laddr = link->addr; 672 flavor = link->flavor; 673 bytes = link->bytes; 674 if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 675 else { 676 *p = link->next; 677 update = PETSC_FALSE; 678 } /* remove from list */ 679 goto found; 680 } 681 } 682 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 683 684 found: 685 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm)); 686 if (epoch) { 687 switch (sync) { 688 case PETSCSF_WINDOW_SYNC_FENCE: 689 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 690 break; 691 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 692 break; 693 case PETSCSF_WINDOW_SYNC_ACTIVE: { 694 MPI_Group ingroup, outgroup; 695 PetscMPIInt isize, osize; 696 697 /* Open MPI 4.0.2 with btl=wader does not like calling 698 - MPI_Win_complete when ogroup is empty 699 - MPI_Win_wait when igroup is empty 700 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 701 - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete 702 - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait 703 */ 704 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 705 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 706 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 707 if (osize) PetscCallMPI(MPI_Win_complete(*win)); 708 if (isize) PetscCallMPI(MPI_Win_wait(*win)); 709 } break; 710 default: 711 SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type"); 712 } 713 } 714 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 715 if (link->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC && !link->persistent) { 716 if (link->addr != NULL) PetscCallMPI(MPI_Win_detach(link->win, link->addr)); 717 link->addr = NULL; 718 } 719 #endif 720 if (update) { 721 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win)); 722 PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes)); 723 } 724 link->epoch = PETSC_FALSE; 725 link->inuse = PETSC_FALSE; 726 if (!link->persistent) { 727 link->rootdata = NULL; 728 link->leafdata = NULL; 729 } 730 if (!reuse) { 731 PetscCall(PetscFree(link->dyn_target_addr)); 732 PetscCall(PetscFree(link->reqs)); 733 PetscCallMPI(MPI_Win_free(&link->win)); 734 PetscCall(PetscFree(link)); 735 *win = MPI_WIN_NULL; 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf) 741 { 742 PetscSF_Window *w = (PetscSF_Window *)sf->data; 743 MPI_Group ingroup, outgroup; 744 MPI_Comm comm; 745 746 PetscFunctionBegin; 747 PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY)); 748 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 749 if (w->window_comm == MPI_COMM_NULL) { 750 PetscInt nroots, nleaves, nranks; 751 PetscBool has_empty; 752 PetscMPIInt wcommrank; 753 PetscSF dynsf_full = NULL; 754 755 if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full)); 756 757 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, NULL, NULL)); 758 has_empty = (nroots == 0 && nleaves == 0) ? PETSC_TRUE : PETSC_FALSE; 759 nranks = sf->nranks; 760 PetscCall(PetscMalloc1(nranks, &w->wcommranks)); 761 w->is_empty = has_empty; 762 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &has_empty, 1, MPIU_BOOL, MPI_LOR, comm)); 763 if (has_empty) { 764 PetscMPIInt rank; 765 MPI_Comm raw_comm; 766 PetscSFNode *remotes; 767 768 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 769 PetscCallMPI(MPI_Comm_split(comm, w->is_empty ? 1 : 0, rank, &raw_comm)); 770 PetscCall(PetscCommDuplicate(raw_comm, &w->window_comm, NULL)); 771 PetscCallMPI(MPI_Comm_free(&raw_comm)); 772 773 PetscCallMPI(MPI_Comm_rank(w->window_comm, &wcommrank)); 774 if (!dynsf_full) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full)); 775 PetscCall(PetscSFBcastBegin(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE)); 776 PetscCall(PetscSFBcastEnd(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE)); 777 778 if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) { 779 PetscCall(PetscSFCreate(w->window_comm, &w->dynsf)); 780 PetscCall(PetscSFSetType(w->dynsf, PETSCSFBASIC)); /* break recursion */ 781 PetscCall(PetscMalloc1(sf->nranks, &remotes)); 782 for (PetscInt i = 0; i < sf->nranks; i++) { 783 remotes[i].rank = w->wcommranks[i]; 784 remotes[i].index = 0; 785 } 786 PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER)); 787 } 788 } else { 789 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sf), &w->window_comm, NULL)); 790 PetscCall(PetscArraycpy(w->wcommranks, sf->ranks, nranks)); 791 PetscCall(PetscObjectReference((PetscObject)dynsf_full)); 792 w->dynsf = dynsf_full; 793 } 794 if (w->dynsf) PetscCall(PetscSFSetUp(w->dynsf)); 795 PetscCall(PetscSFDestroy(&dynsf_full)); 796 } 797 switch (w->sync) { 798 case PETSCSF_WINDOW_SYNC_ACTIVE: 799 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 800 default: 801 break; 802 } 803 PetscFunctionReturn(PETSC_SUCCESS); 804 } 805 806 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject) 807 { 808 PetscSF_Window *w = (PetscSF_Window *)sf->data; 809 PetscSFWindowFlavorType flavor = w->flavor; 810 811 PetscFunctionBegin; 812 PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options"); 813 PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL)); 814 PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL)); 815 PetscCall(PetscSFWindowSetFlavorType(sf, flavor)); 816 PetscOptionsHeadEnd(); 817 PetscFunctionReturn(PETSC_SUCCESS); 818 } 819 820 static PetscErrorCode PetscSFReset_Window(PetscSF sf) 821 { 822 PetscSF_Window *w = (PetscSF_Window *)sf->data; 823 PetscSFDataLink link, next; 824 PetscSFWinLink wlink, wnext; 825 PetscInt i; 826 MPI_Comm wcomm; 827 PetscBool is_empty; 828 829 PetscFunctionBegin; 830 for (link = w->link; link; link = next) { 831 next = link->next; 832 PetscCallMPI(MPI_Type_free(&link->unit)); 833 for (i = 0; i < sf->nranks; i++) { 834 PetscCallMPI(MPI_Type_free(&link->mine[i])); 835 PetscCallMPI(MPI_Type_free(&link->remote[i])); 836 } 837 PetscCall(PetscFree2(link->mine, link->remote)); 838 PetscCall(PetscFree(link)); 839 } 840 w->link = NULL; 841 wcomm = w->window_comm; 842 is_empty = w->is_empty; 843 if (!is_empty) { 844 for (wlink = w->wins; wlink; wlink = wnext) { 845 wnext = wlink->next; 846 PetscCheck(!wlink->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr); 847 PetscCall(PetscFree(wlink->dyn_target_addr)); 848 PetscCall(PetscFree(wlink->reqs)); 849 PetscCallMPI(MPI_Win_free(&wlink->win)); 850 PetscCall(PetscFree(wlink)); 851 } 852 } 853 w->wins = NULL; 854 PetscCall(PetscSFDestroy(&w->dynsf)); 855 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 856 PetscCall(PetscCommDestroy(&w->window_comm)); 857 PetscCall(PetscFree(w->wcommranks)); 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 static PetscErrorCode PetscSFRegisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 862 { 863 PetscSF_Window *w = (PetscSF_Window *)sf->data; 864 MPI_Aint bytes, wsize; 865 PetscBool is_empty; 866 PetscSFWinLink link; 867 868 PetscFunctionBegin; 869 PetscCall(PetscSFSetUp(sf)); 870 if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS); 871 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 872 wsize = (MPI_Aint)(bytes * sf->nroots); 873 is_empty = w->is_empty; 874 if (is_empty) PetscFunctionReturn(PETSC_SUCCESS); 875 PetscCall(PetscNew(&link)); 876 link->flavor = w->flavor; 877 link->next = w->wins; 878 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 879 { 880 MPI_Comm wcomm = w->window_comm; 881 PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win)); 882 } 883 #endif 884 PetscCall(PetscSFWindowAttach(sf, link, (void *)rootdata, wsize)); 885 link->rootdata = (void *)rootdata; 886 link->leafdata = (void *)leafdata; 887 link->bytes = bytes; 888 link->epoch = PETSC_FALSE; 889 link->inuse = PETSC_FALSE; 890 link->persistent = PETSC_TRUE; 891 w->wins = link; 892 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 893 PetscInt i; 894 895 PetscCall(PetscMalloc1(sf->nranks, &link->reqs)); 896 for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL; 897 } 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode PetscSFDeregisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 902 { 903 PetscSF_Window *w = (PetscSF_Window *)sf->data; 904 MPI_Aint bytes; 905 MPI_Comm wcomm; 906 PetscBool is_empty; 907 PetscSFWinLink *p; 908 909 PetscFunctionBegin; 910 PetscCall(PetscSFSetUp(sf)); 911 if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS); 912 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 913 wcomm = w->window_comm; 914 is_empty = w->is_empty; 915 if (is_empty) PetscFunctionReturn(PETSC_SUCCESS); 916 for (p = &w->wins; *p; p = &(*p)->next) { 917 PetscSFWinLink link = *p; 918 if (link->flavor == w->flavor && link->persistent && link->rootdata == rootdata && link->leafdata == leafdata && link->bytes == bytes) { 919 PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window when communication is still in progress"); 920 PetscCheck(!link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window with an unconcluded epoch"); 921 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 922 PetscCallMPI(MPI_Win_detach(link->win, link->addr)); 923 link->addr = NULL; 924 #endif 925 PetscCall(PetscFree(link->dyn_target_addr)); 926 PetscCall(PetscFree(link->reqs)); 927 PetscCallMPI(MPI_Win_free(&link->win)); 928 *p = link->next; 929 PetscCall(PetscFree(link)); 930 break; 931 } 932 } 933 PetscFunctionReturn(PETSC_SUCCESS); 934 } 935 936 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf) 937 { 938 PetscFunctionBegin; 939 PetscCall(PetscSFReset_Window(sf)); 940 PetscCall(PetscFree(sf->data)); 941 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL)); 942 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL)); 943 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL)); 944 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL)); 945 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL)); 946 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL)); 947 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", NULL)); 948 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", NULL)); 949 PetscFunctionReturn(PETSC_SUCCESS); 950 } 951 952 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer) 953 { 954 PetscSF_Window *w = (PetscSF_Window *)sf->data; 955 PetscBool iascii; 956 PetscViewerFormat format; 957 958 PetscFunctionBegin; 959 PetscCall(PetscViewerGetFormat(viewer, &format)); 960 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 961 if (iascii) { 962 PetscCall(PetscViewerASCIIPrintf(viewer, " current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered")); 963 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 964 if (w->info != MPI_INFO_NULL) { 965 PetscMPIInt k, nkeys; 966 char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL]; 967 968 PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys)); 969 PetscCall(PetscViewerASCIIPrintf(viewer, " current info with %d keys. Ordered key-value pairs follow:\n", nkeys)); 970 for (k = 0; k < nkeys; k++) { 971 PetscMPIInt flag; 972 973 PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key)); 974 PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag)); 975 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key); 976 PetscCall(PetscViewerASCIIPrintf(viewer, " %s = %s\n", key, value)); 977 } 978 } else { 979 PetscCall(PetscViewerASCIIPrintf(viewer, " current info=MPI_INFO_NULL\n")); 980 } 981 } 982 } 983 PetscFunctionReturn(PETSC_SUCCESS); 984 } 985 986 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf) 987 { 988 PetscSF_Window *w = (PetscSF_Window *)sf->data; 989 PetscSFWindowSyncType synctype; 990 991 PetscFunctionBegin; 992 synctype = w->sync; 993 /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */ 994 if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK; 995 PetscCall(PetscSFWindowSetSyncType(newsf, synctype)); 996 PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor)); 997 PetscCall(PetscSFWindowSetInfo(newsf, w->info)); 998 PetscFunctionReturn(PETSC_SUCCESS); 999 } 1000 1001 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1002 { 1003 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1004 PetscInt i, nranks; 1005 const PetscMPIInt *ranks; 1006 const MPI_Aint *target_disp; 1007 const MPI_Datatype *mine, *remote; 1008 MPI_Request *reqs; 1009 MPI_Win win; 1010 1011 PetscFunctionBegin; 1012 PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented"); 1013 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1014 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 1015 PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win)); 1016 ranks = w->wcommranks; 1017 for (i = 0; i < nranks; i++) { 1018 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 1019 1020 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 1021 PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 1022 #if defined(PETSC_HAVE_MPI_RGET) 1023 PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i])); 1024 #else 1025 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 1026 #endif 1027 } else { 1028 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 1029 } 1030 } 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 static PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1035 { 1036 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1037 MPI_Win win; 1038 MPI_Request *reqs = NULL; 1039 1040 PetscFunctionBegin; 1041 PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs)); 1042 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 1043 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 1044 PetscInt i, nranks; 1045 const PetscMPIInt *ranks; 1046 1047 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1048 ranks = w->wcommranks; 1049 for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1050 } 1051 PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 static PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1056 { 1057 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1058 PetscInt i, nranks; 1059 const PetscMPIInt *ranks; 1060 const MPI_Aint *target_disp; 1061 const MPI_Datatype *mine, *remote; 1062 MPI_Win win; 1063 1064 PetscFunctionBegin; 1065 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1066 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 1067 PetscCall(PetscSFWindowOpTranslate(&op)); 1068 PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 1069 ranks = w->wcommranks; 1070 for (i = 0; i < nranks; i++) { 1071 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 1072 1073 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 1074 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 1075 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1076 } 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1081 { 1082 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1083 MPI_Win win; 1084 MPI_Request *reqs = NULL; 1085 1086 PetscFunctionBegin; 1087 PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs)); 1088 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 1089 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 1094 { 1095 PetscInt i, nranks; 1096 const PetscMPIInt *ranks; 1097 const MPI_Datatype *mine, *remote; 1098 const MPI_Aint *target_disp; 1099 MPI_Win win; 1100 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1101 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1102 PetscSFWindowFlavorType oldf; 1103 #endif 1104 1105 PetscFunctionBegin; 1106 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1107 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 1108 PetscCall(PetscSFWindowOpTranslate(&op)); 1109 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1110 /* FetchAndOp without MPI_Get_Accumulate requires locking. 1111 we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */ 1112 oldf = w->flavor; 1113 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 1114 PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win)); 1115 #else 1116 PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 1117 #endif 1118 ranks = w->wcommranks; 1119 for (i = 0; i < nranks; i++) { 1120 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 1121 1122 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1123 PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win)); 1124 PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 1125 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 1126 PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1127 #else 1128 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win)); 1129 PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 1130 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1131 #endif 1132 } 1133 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1134 w->flavor = oldf; 1135 #endif 1136 PetscFunctionReturn(PETSC_SUCCESS); 1137 } 1138 1139 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1140 { 1141 MPI_Win win; 1142 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1143 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1144 #endif 1145 MPI_Request *reqs = NULL; 1146 1147 PetscFunctionBegin; 1148 PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs)); 1149 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 1150 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1151 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 1152 #else 1153 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win)); 1154 #endif 1155 PetscFunctionReturn(PETSC_SUCCESS); 1156 } 1157 1158 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf) 1159 { 1160 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1161 1162 PetscFunctionBegin; 1163 sf->ops->SetUp = PetscSFSetUp_Window; 1164 sf->ops->SetFromOptions = PetscSFSetFromOptions_Window; 1165 sf->ops->Reset = PetscSFReset_Window; 1166 sf->ops->Destroy = PetscSFDestroy_Window; 1167 sf->ops->View = PetscSFView_Window; 1168 sf->ops->Duplicate = PetscSFDuplicate_Window; 1169 sf->ops->BcastBegin = PetscSFBcastBegin_Window; 1170 sf->ops->BcastEnd = PetscSFBcastEnd_Window; 1171 sf->ops->ReduceBegin = PetscSFReduceBegin_Window; 1172 sf->ops->ReduceEnd = PetscSFReduceEnd_Window; 1173 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window; 1174 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window; 1175 1176 PetscCall(PetscNew(&w)); 1177 sf->data = (void *)w; 1178 w->sync = PETSCSF_WINDOW_SYNC_FENCE; 1179 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 1180 w->info = MPI_INFO_NULL; 1181 w->window_comm = MPI_COMM_NULL; 1182 1183 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window)); 1184 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", PetscSFRegisterPersistent_Window)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", PetscSFDeregisterPersistent_Window)); 1191 1192 #if defined(PETSC_HAVE_OPENMPI) 1193 #if PETSC_PKG_OPENMPI_VERSION_LE(1, 6, 0) 1194 { 1195 PetscBool ackbug = PETSC_FALSE; 1196 PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL)); 1197 if (ackbug) { 1198 PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n")); 1199 } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed"); 1200 } 1201 #endif 1202 #endif 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205