1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 3 typedef struct _n_PetscSFDataLink *PetscSFDataLink; 4 typedef struct _n_PetscSFWinLink *PetscSFWinLink; 5 6 typedef struct { 7 PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */ 8 PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */ 9 PetscSFWinLink wins; /* List of active windows */ 10 PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */ 11 PetscSF dynsf; 12 MPI_Info info; 13 MPI_Comm window_comm; 14 PetscBool is_empty; 15 PetscMPIInt *wcommranks; 16 } PetscSF_Window; 17 18 struct _n_PetscSFDataLink { 19 MPI_Datatype unit; 20 MPI_Datatype *mine; 21 MPI_Datatype *remote; 22 PetscSFDataLink next; 23 }; 24 25 struct _n_PetscSFWinLink { 26 PetscBool inuse; 27 MPI_Aint bytes; 28 void *addr; 29 void *rootdata; 30 void *leafdata; 31 MPI_Win win; 32 MPI_Request *reqs; 33 PetscSFWindowFlavorType flavor; 34 MPI_Aint *dyn_target_addr; 35 PetscBool epoch; 36 PetscBool persistent; 37 PetscSFWinLink next; 38 }; 39 40 const char *const PetscSFWindowSyncTypes[] = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL}; 41 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL}; 42 43 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */ 44 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op) 45 { 46 PetscFunctionBegin; 47 if (*op == MPIU_SUM) *op = MPI_SUM; 48 else if (*op == MPIU_MAX) *op = MPI_MAX; 49 else if (*op == MPIU_MIN) *op = MPI_MIN; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 /* 54 PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank 55 56 Not Collective 57 58 Input Parameters: 59 + sf - star forest of type `PETSCSFWINDOW` 60 - unit - data type for each node 61 62 Output Parameters: 63 + localtypes - types describing part of local leaf buffer referencing each remote rank 64 - remotetypes - types describing part of remote root buffer referenced for each remote rank 65 66 Level: developer 67 68 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()` 69 @*/ 70 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes) 71 { 72 PetscSF_Window *w = (PetscSF_Window *)sf->data; 73 PetscSFDataLink link; 74 PetscMPIInt nranks; 75 const PetscInt *roffset; 76 77 PetscFunctionBegin; 78 /* Look for types in cache */ 79 for (link = w->link; link; link = link->next) { 80 PetscBool match; 81 82 PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match)); 83 if (match) { 84 *localtypes = link->mine; 85 *remotetypes = link->remote; 86 PetscFunctionReturn(PETSC_SUCCESS); 87 } 88 } 89 90 /* Create new composite types for each send rank */ 91 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, &roffset, NULL, NULL)); 92 PetscCall(PetscNew(&link)); 93 PetscCallMPI(MPI_Type_dup(unit, &link->unit)); 94 PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote)); 95 for (PetscMPIInt i = 0; i < nranks; i++) { 96 PetscMPIInt rcount; 97 PetscMPIInt *rmine, *rremote; 98 99 PetscCall(PetscMPIIntCast(roffset[i + 1] - roffset[i], &rcount)); 100 #if !defined(PETSC_USE_64BIT_INDICES) 101 rmine = sf->rmine + sf->roffset[i]; 102 rremote = sf->rremote + sf->roffset[i]; 103 #else 104 PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote)); 105 for (PetscInt j = 0; j < rcount; j++) { 106 PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], &rmine[j])); 107 PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], &rremote[j])); 108 } 109 #endif 110 111 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i])); 112 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i])); 113 #if defined(PETSC_USE_64BIT_INDICES) 114 PetscCall(PetscFree2(rmine, rremote)); 115 #endif 116 PetscCallMPI(MPI_Type_commit(&link->mine[i])); 117 PetscCallMPI(MPI_Type_commit(&link->remote[i])); 118 } 119 link->next = w->link; 120 w->link = link; 121 122 *localtypes = link->mine; 123 *remotetypes = link->remote; 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 /*@ 128 PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation 129 130 Logically Collective 131 132 Input Parameters: 133 + sf - star forest for communication of type `PETSCSFWINDOW` 134 - flavor - flavor type 135 136 Options Database Key: 137 . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`) 138 139 Level: advanced 140 141 Notes: 142 Windows reuse follows these rules\: 143 .vb 144 PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create 145 146 PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g. 147 PetscSFRegisterPersistent(sf,rootdata1,leafdata); 148 for i=1 to K 149 PetscSFOperationBegin(sf,rootdata1,leafdata); 150 PetscSFOperationEnd(sf,rootdata1,leafdata); 151 ... 152 PetscSFOperationBegin(sf,rootdata1,leafdata); 153 PetscSFOperationEnd(sf,rootdata1,leafdata); 154 endfor 155 PetscSFDeregisterPersistent(sf,rootdata1,leafdata); 156 157 The following pattern will instead raise an error 158 PetscSFOperationBegin(sf,rootdata1,leafdata); 159 PetscSFOperationEnd(sf,rootdata1,leafdata); 160 PetscSFOperationBegin(sf,rank ? rootdata1 : rootdata2,leafdata); 161 PetscSFOperationEnd(sf,rank ? rootdata1 : rootdata2,leafdata); 162 163 PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use 164 165 PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE 166 .ve 167 168 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()` 169 @*/ 170 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor) 171 { 172 PetscFunctionBegin; 173 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 174 PetscValidLogicalCollectiveEnum(sf, flavor, 2); 175 PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor)); 176 PetscFunctionReturn(PETSC_SUCCESS); 177 } 178 179 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor) 180 { 181 PetscSF_Window *w = (PetscSF_Window *)sf->data; 182 183 PetscFunctionBegin; 184 w->flavor = flavor; 185 PetscFunctionReturn(PETSC_SUCCESS); 186 } 187 188 /*@ 189 PetscSFWindowGetFlavorType - Get `PETSCSFWINDOW` flavor type for `PetscSF` communication 190 191 Logically Collective 192 193 Input Parameter: 194 . sf - star forest for communication of type `PETSCSFWINDOW` 195 196 Output Parameter: 197 . flavor - flavor type 198 199 Level: advanced 200 201 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()` 202 @*/ 203 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor) 204 { 205 PetscFunctionBegin; 206 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 207 PetscAssertPointer(flavor, 2); 208 PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor)); 209 PetscFunctionReturn(PETSC_SUCCESS); 210 } 211 212 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor) 213 { 214 PetscSF_Window *w = (PetscSF_Window *)sf->data; 215 216 PetscFunctionBegin; 217 *flavor = w->flavor; 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 /*@ 222 PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 223 224 Logically Collective 225 226 Input Parameters: 227 + sf - star forest for communication 228 - sync - synchronization type 229 230 Options Database Key: 231 . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`) 232 233 Level: advanced 234 235 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType` 236 @*/ 237 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync) 238 { 239 PetscFunctionBegin; 240 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 241 PetscValidLogicalCollectiveEnum(sf, sync, 2); 242 PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync)); 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync) 247 { 248 PetscSF_Window *w = (PetscSF_Window *)sf->data; 249 250 PetscFunctionBegin; 251 w->sync = sync; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /*@ 256 PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 257 258 Logically Collective 259 260 Input Parameter: 261 . sf - star forest for communication 262 263 Output Parameter: 264 . sync - synchronization type 265 266 Level: advanced 267 268 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType` 269 @*/ 270 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync) 271 { 272 PetscFunctionBegin; 273 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 274 PetscAssertPointer(sync, 2); 275 PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync) 280 { 281 PetscSF_Window *w = (PetscSF_Window *)sf->data; 282 283 PetscFunctionBegin; 284 *sync = w->sync; 285 PetscFunctionReturn(PETSC_SUCCESS); 286 } 287 288 /*@C 289 PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation 290 291 Logically Collective 292 293 Input Parameters: 294 + sf - star forest for communication 295 - info - `MPI_Info` handle 296 297 Level: advanced 298 299 Note: 300 The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`. 301 302 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()` 303 @*/ 304 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info) 305 { 306 PetscFunctionBegin; 307 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 308 PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info)); 309 PetscFunctionReturn(PETSC_SUCCESS); 310 } 311 312 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info) 313 { 314 PetscSF_Window *w = (PetscSF_Window *)sf->data; 315 316 PetscFunctionBegin; 317 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 318 if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info)); 319 PetscFunctionReturn(PETSC_SUCCESS); 320 } 321 322 /*@C 323 PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation 324 325 Logically Collective 326 327 Input Parameter: 328 . sf - star forest for communication 329 330 Output Parameter: 331 . info - `MPI_Info` handle 332 333 Level: advanced 334 335 Note: 336 If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL` 337 338 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()` 339 @*/ 340 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info) 341 { 342 PetscFunctionBegin; 343 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 344 PetscAssertPointer(info, 2); 345 PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info)); 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info) 350 { 351 PetscSF_Window *w = (PetscSF_Window *)sf->data; 352 353 PetscFunctionBegin; 354 *info = w->info; 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 static PetscErrorCode PetscSFWindowCreateDynamicSF(PetscSF sf, PetscSF *dynsf) 359 { 360 PetscSFNode *remotes; 361 362 PetscFunctionBegin; 363 PetscCall(PetscMalloc1(sf->nranks, &remotes)); 364 for (PetscInt i = 0; i < sf->nranks; i++) { 365 remotes[i].rank = sf->ranks[i]; 366 remotes[i].index = 0; 367 } 368 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, dynsf)); 369 PetscCall(PetscSFSetType(*dynsf, PETSCSFBASIC)); /* break recursion */ 370 PetscCall(PetscSFSetGraph(*dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER)); 371 PetscFunctionReturn(PETSC_SUCCESS); 372 } 373 374 static PetscErrorCode PetscSFWindowAttach(PetscSF sf, PetscSFWinLink link, void *rootdata, size_t wsize) 375 { 376 PetscFunctionBegin; 377 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 378 { 379 PetscSF_Window *w = (PetscSF_Window *)sf->data; 380 MPI_Comm wcomm; 381 MPI_Aint winaddr; 382 void *addr = rootdata; 383 PetscMPIInt nranks; 384 // some Open MPI versions do not support MPI_Win_attach(win,NULL,0); 385 wcomm = w->window_comm; 386 if (addr != NULL) PetscCallMPI(MPI_Win_attach(link->win, addr, wsize)); 387 link->addr = addr; 388 PetscCheck(w->dynsf, wcomm, PETSC_ERR_ORDER, "Must call PetscSFSetUp()"); 389 PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL)); 390 PetscCallMPI(MPI_Get_address(addr, &winaddr)); 391 if (!link->dyn_target_addr) PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr)); 392 PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 393 PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 394 } 395 #else 396 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "dynamic windows not supported"); 397 #endif 398 PetscFunctionReturn(PETSC_SUCCESS); 399 } 400 401 /* 402 PetscSFGetWindow - Get a window for use with a given data type 403 404 Collective 405 406 Input Parameters: 407 + sf - star forest 408 . unit - data type 409 . rootdata - array to be sent 410 . leafdata - only used to help uniquely identify windows 411 . sync - type of synchronization `PetscSFWindowSyncType` 412 . epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window 413 . fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE` 414 . postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 415 - startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 416 417 Output Parameters: 418 + target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only) 419 + reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only) 420 - win - window 421 422 Level: developer 423 424 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()` 425 */ 426 427 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *rootdata, void *leafdata, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win) 428 { 429 PetscSF_Window *w = (PetscSF_Window *)sf->data; 430 MPI_Aint bytes; 431 PetscSFWinLink link; 432 PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE; 433 MPI_Aint wsize; 434 MPI_Comm wcomm; 435 PetscBool is_empty; 436 437 PetscFunctionBegin; 438 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 439 wsize = (MPI_Aint)(bytes * sf->nroots); 440 wcomm = w->window_comm; 441 is_empty = w->is_empty; 442 if (is_empty) { 443 if (target_disp) *target_disp = NULL; 444 if (reqs) *reqs = NULL; 445 *win = MPI_WIN_NULL; 446 PetscFunctionReturn(PETSC_SUCCESS); 447 } 448 if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 449 if (PetscDefined(HAVE_MPI_FEATURE_DYNAMIC_WINDOW) && w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) { 450 // first search for a persistent window 451 for (link = w->wins; reuse && link; link = link->next) { 452 PetscBool match; 453 454 if (!link->persistent) continue; 455 match = (link->flavor == w->flavor && link->rootdata == rootdata && link->leafdata == leafdata) ? PETSC_TRUE : PETSC_FALSE; 456 if (PetscDefined(USE_DEBUG)) { 457 PetscInt matches[2]; 458 PetscInt all_matches[2]; 459 460 matches[0] = match ? 1 : 0; 461 matches[1] = match ? -1 : 0; 462 PetscCallMPI(MPIU_Allreduce(matches, all_matches, 2, MPIU_INT, MPI_MAX, wcomm)); 463 all_matches[1] = -all_matches[1]; 464 PetscCheck(all_matches[0] == all_matches[1], wcomm, PETSC_ERR_ARG_INCOMP, 465 "Inconsistent use across MPI processes of persistent leaf and root data registered with PetscSFRegisterPersistent().\n" 466 "Either the persistent data was changed on a subset of processes (which is not allowed),\n" 467 "or persistent data was not deregistered with PetscSFDeregisterPersistent() before being deallocated"); 468 } 469 if (match) { 470 PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication already in progress on persistent root and leaf data"); 471 PetscCheck(!epoch || !link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Communication epoch already open for window"); 472 PetscCheck(bytes == link->bytes, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Wrong data type for persistent root and leaf data"); 473 *win = link->win; 474 goto found; 475 } 476 } 477 } 478 for (link = w->wins; reuse && link; link = link->next) { 479 if (w->flavor != link->flavor) continue; 480 /* an existing window can be used (1) if it is not in use, (2) if we are 481 not asking to start an epoch or it does not have an already started 482 epoch, and (3) if it is the right size */ 483 if (!link->inuse && (!epoch || !link->epoch) && bytes == (MPI_Aint)link->bytes) { 484 if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) { 485 PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize)); 486 } else { 487 update = PETSC_TRUE; 488 } 489 link->rootdata = rootdata; 490 link->leafdata = leafdata; 491 PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm)); 492 *win = link->win; 493 goto found; 494 } 495 } 496 497 PetscCall(PetscNew(&link)); 498 link->bytes = bytes; 499 link->next = w->wins; 500 link->flavor = w->flavor; 501 link->dyn_target_addr = NULL; 502 link->reqs = NULL; 503 w->wins = link; 504 link->rootdata = rootdata; 505 link->leafdata = leafdata; 506 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 507 PetscCall(PetscMalloc1(sf->nranks, &link->reqs)); 508 for (PetscMPIInt i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL; 509 } 510 switch (w->flavor) { 511 case PETSCSF_WINDOW_FLAVOR_CREATE: 512 PetscCallMPI(MPI_Win_create(rootdata, wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->win)); 513 link->addr = rootdata; 514 break; 515 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 516 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: 517 PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win)); 518 PetscCall(PetscSFWindowAttach(sf, link, rootdata, wsize)); 519 break; 520 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: 521 PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win)); 522 update = PETSC_TRUE; 523 break; 524 #endif 525 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 526 case PETSCSF_WINDOW_FLAVOR_SHARED: 527 PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, wcomm, &link->addr, &link->win)); 528 update = PETSC_TRUE; 529 break; 530 #endif 531 default: 532 SETERRQ(wcomm, PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]); 533 } 534 PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm)); 535 *win = link->win; 536 537 found: 538 539 if (target_disp) *target_disp = link->dyn_target_addr; 540 if (reqs) *reqs = link->reqs; 541 if (update) { /* locks are needed for the "separate" memory model only, the fence guarantees memory-synchronization */ 542 PetscMPIInt rank; 543 544 PetscCallMPI(MPI_Comm_rank(wcomm, &rank)); 545 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win)); 546 PetscCall(PetscMemcpy(link->addr, rootdata, sf->nroots * bytes)); 547 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 548 PetscCallMPI(MPI_Win_unlock(rank, *win)); 549 PetscCallMPI(MPI_Win_fence(0, *win)); 550 } 551 } 552 link->inuse = PETSC_TRUE; 553 link->epoch = epoch; 554 if (epoch) { 555 switch (sync) { 556 case PETSCSF_WINDOW_SYNC_FENCE: 557 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 558 break; 559 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 560 break; 561 case PETSCSF_WINDOW_SYNC_ACTIVE: { 562 MPI_Group ingroup, outgroup; 563 PetscMPIInt isize, osize; 564 565 /* Open MPI 4.0.2 with btl=vader does not like calling 566 - MPI_Win_complete when ogroup is empty 567 - MPI_Win_wait when igroup is empty 568 So, we do not even issue the corresponding start and post calls 569 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 570 start(outgroup) has a matching post(ingroup) 571 and this is guaranteed by PetscSF 572 */ 573 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 574 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 575 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 576 if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win)); 577 if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win)); 578 } break; 579 default: 580 SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type"); 581 } 582 } 583 PetscFunctionReturn(PETSC_SUCCESS); 584 } 585 586 /* 587 PetscSFFindWindow - Finds a window that is already in use 588 589 Not Collective 590 591 Input Parameters: 592 + sf - star forest 593 . unit - data type 594 . rootdata - array with which the window is associated 595 - leafdata - only used to help uniquely identify windows 596 597 Output Parameters: 598 + win - window 599 - reqs - outstanding requests associated to the window 600 601 Level: developer 602 603 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 604 */ 605 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata, MPI_Win *win, MPI_Request **reqs) 606 { 607 PetscSF_Window *w = (PetscSF_Window *)sf->data; 608 PetscSFWinLink link; 609 PetscBool is_empty; 610 MPI_Aint bytes; 611 612 PetscFunctionBegin; 613 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 614 *win = MPI_WIN_NULL; 615 is_empty = w->is_empty; 616 if (is_empty) { 617 *reqs = NULL; 618 *win = MPI_WIN_NULL; 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 for (link = w->wins; link; link = link->next) { 622 if (rootdata == link->rootdata && leafdata == link->leafdata && bytes == link->bytes) { 623 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)w->window_comm)); 624 *win = link->win; 625 *reqs = link->reqs; 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 } 629 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 630 } 631 632 /* 633 PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()` 634 635 Collective 636 637 Input Parameters: 638 + sf - star forest 639 . unit - data type 640 . array - array associated with window 641 . sync - type of synchronization `PetscSFWindowSyncType` 642 . epoch - close an epoch, must match argument to `PetscSFGetWindow()` 643 . update - if we have to update the local window array 644 - win - window 645 646 Level: developer 647 648 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()` 649 */ 650 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win) 651 { 652 PetscSF_Window *w = (PetscSF_Window *)sf->data; 653 PetscSFWinLink *p, link; 654 PetscBool reuse = PETSC_FALSE; 655 PetscSFWindowFlavorType flavor; 656 void *laddr; 657 MPI_Aint bytes; 658 MPI_Comm wcomm; 659 660 PetscFunctionBegin; 661 if (*win == MPI_WIN_NULL) PetscFunctionReturn(PETSC_SUCCESS); 662 wcomm = w->window_comm; 663 for (p = &w->wins; *p; p = &(*p)->next) { 664 link = *p; 665 if (*win == link->win) { 666 PetscCheck(array == link->rootdata, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array"); 667 if (epoch != link->epoch) { 668 PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end"); 669 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch"); 670 } 671 laddr = link->addr; 672 flavor = link->flavor; 673 bytes = link->bytes; 674 if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 675 else { 676 *p = link->next; 677 update = PETSC_FALSE; 678 } /* remove from list */ 679 goto found; 680 } 681 } 682 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 683 684 found: 685 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)wcomm)); 686 if (epoch) { 687 switch (sync) { 688 case PETSCSF_WINDOW_SYNC_FENCE: 689 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 690 break; 691 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 692 break; 693 case PETSCSF_WINDOW_SYNC_ACTIVE: { 694 MPI_Group ingroup, outgroup; 695 PetscMPIInt isize, osize; 696 697 /* Open MPI 4.0.2 with btl=wader does not like calling 698 - MPI_Win_complete when ogroup is empty 699 - MPI_Win_wait when igroup is empty 700 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 701 - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete 702 - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait 703 */ 704 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 705 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 706 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 707 if (osize) PetscCallMPI(MPI_Win_complete(*win)); 708 if (isize) PetscCallMPI(MPI_Win_wait(*win)); 709 } break; 710 default: 711 SETERRQ(wcomm, PETSC_ERR_PLIB, "Unknown synchronization type"); 712 } 713 } 714 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 715 if (link->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC && !link->persistent) { 716 if (link->addr != NULL) PetscCallMPI(MPI_Win_detach(link->win, link->addr)); 717 link->addr = NULL; 718 } 719 #endif 720 if (update) { 721 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win)); 722 PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes)); 723 } 724 link->epoch = PETSC_FALSE; 725 link->inuse = PETSC_FALSE; 726 if (!link->persistent) { 727 link->rootdata = NULL; 728 link->leafdata = NULL; 729 } 730 if (!reuse) { 731 PetscCall(PetscFree(link->dyn_target_addr)); 732 PetscCall(PetscFree(link->reqs)); 733 PetscCallMPI(MPI_Win_free(&link->win)); 734 PetscCall(PetscFree(link)); 735 *win = MPI_WIN_NULL; 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf) 741 { 742 PetscSF_Window *w = (PetscSF_Window *)sf->data; 743 MPI_Group ingroup, outgroup; 744 MPI_Comm comm; 745 746 PetscFunctionBegin; 747 PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY)); 748 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 749 if (w->window_comm == MPI_COMM_NULL) { 750 PetscInt nroots, nleaves, nranks; 751 PetscBool has_empty; 752 PetscMPIInt wcommrank; 753 PetscSF dynsf_full = NULL; 754 755 if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full)); 756 757 PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, NULL, NULL)); 758 has_empty = (nroots == 0 && nleaves == 0) ? PETSC_TRUE : PETSC_FALSE; 759 nranks = sf->nranks; 760 PetscCall(PetscMalloc1(nranks, &w->wcommranks)); 761 w->is_empty = has_empty; 762 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &has_empty, 1, MPIU_BOOL, MPI_LOR, comm)); 763 if (has_empty) { 764 PetscMPIInt rank; 765 MPI_Comm raw_comm; 766 PetscSFNode *remotes; 767 768 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 769 PetscCallMPI(MPI_Comm_split(comm, w->is_empty ? 1 : 0, rank, &raw_comm)); 770 PetscCall(PetscCommDuplicate(raw_comm, &w->window_comm, NULL)); 771 PetscCallMPI(MPI_Comm_free(&raw_comm)); 772 773 PetscCallMPI(MPI_Comm_rank(w->window_comm, &wcommrank)); 774 if (!dynsf_full) PetscCall(PetscSFWindowCreateDynamicSF(sf, &dynsf_full)); 775 PetscCall(PetscSFBcastBegin(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE)); 776 PetscCall(PetscSFBcastEnd(dynsf_full, MPI_INT, &wcommrank, w->wcommranks, MPI_REPLACE)); 777 778 if (w->flavor == PETSCSF_WINDOW_FLAVOR_DYNAMIC) { 779 PetscCall(PetscSFCreate(w->window_comm, &w->dynsf)); 780 PetscCall(PetscSFSetType(w->dynsf, PETSCSFBASIC)); /* break recursion */ 781 PetscCall(PetscMalloc1(sf->nranks, &remotes)); 782 for (PetscInt i = 0; i < sf->nranks; i++) { 783 remotes[i].rank = w->wcommranks[i]; 784 remotes[i].index = 0; 785 } 786 PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER)); 787 } 788 } else { 789 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sf), &w->window_comm, NULL)); 790 PetscCall(PetscArraycpy(w->wcommranks, sf->ranks, nranks)); 791 PetscCall(PetscObjectReference((PetscObject)dynsf_full)); 792 w->dynsf = dynsf_full; 793 } 794 if (w->dynsf) PetscCall(PetscSFSetUp(w->dynsf)); 795 PetscCall(PetscSFDestroy(&dynsf_full)); 796 } 797 switch (w->sync) { 798 case PETSCSF_WINDOW_SYNC_ACTIVE: 799 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 800 default: 801 break; 802 } 803 PetscFunctionReturn(PETSC_SUCCESS); 804 } 805 806 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems PetscOptionsObject) 807 { 808 PetscSF_Window *w = (PetscSF_Window *)sf->data; 809 PetscSFWindowFlavorType flavor = w->flavor; 810 811 PetscFunctionBegin; 812 PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options"); 813 PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL)); 814 PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL)); 815 PetscCall(PetscSFWindowSetFlavorType(sf, flavor)); 816 PetscOptionsHeadEnd(); 817 PetscFunctionReturn(PETSC_SUCCESS); 818 } 819 820 static PetscErrorCode PetscSFReset_Window(PetscSF sf) 821 { 822 PetscSF_Window *w = (PetscSF_Window *)sf->data; 823 PetscSFDataLink link, next; 824 PetscSFWinLink wlink, wnext; 825 PetscInt i; 826 MPI_Comm wcomm; 827 PetscBool is_empty; 828 829 PetscFunctionBegin; 830 for (link = w->link; link; link = next) { 831 next = link->next; 832 PetscCallMPI(MPI_Type_free(&link->unit)); 833 for (i = 0; i < sf->nranks; i++) { 834 PetscCallMPI(MPI_Type_free(&link->mine[i])); 835 PetscCallMPI(MPI_Type_free(&link->remote[i])); 836 } 837 PetscCall(PetscFree2(link->mine, link->remote)); 838 PetscCall(PetscFree(link)); 839 } 840 w->link = NULL; 841 wcomm = w->window_comm; 842 is_empty = w->is_empty; 843 if (!is_empty) { 844 for (wlink = w->wins; wlink; wlink = wnext) { 845 wnext = wlink->next; 846 PetscCheck(!wlink->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr); 847 PetscCall(PetscFree(wlink->dyn_target_addr)); 848 PetscCall(PetscFree(wlink->reqs)); 849 PetscCallMPI(MPI_Win_free(&wlink->win)); 850 PetscCall(PetscFree(wlink)); 851 } 852 } 853 w->wins = NULL; 854 PetscCall(PetscSFDestroy(&w->dynsf)); 855 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 856 PetscCall(PetscCommDestroy(&w->window_comm)); 857 PetscCall(PetscFree(w->wcommranks)); 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 static PetscErrorCode PetscSFRegisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 862 { 863 PetscSF_Window *w = (PetscSF_Window *)sf->data; 864 MPI_Aint bytes, wsize; 865 PetscBool is_empty; 866 PetscSFWinLink link; 867 868 PetscFunctionBegin; 869 PetscCall(PetscSFSetUp(sf)); 870 if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS); 871 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 872 wsize = (MPI_Aint)(bytes * sf->nroots); 873 is_empty = w->is_empty; 874 if (is_empty) PetscFunctionReturn(PETSC_SUCCESS); 875 PetscCall(PetscNew(&link)); 876 link->flavor = w->flavor; 877 link->next = w->wins; 878 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 879 { 880 MPI_Comm wcomm = w->window_comm; 881 PetscCallMPI(MPI_Win_create_dynamic(w->info, wcomm, &link->win)); 882 } 883 #endif 884 PetscCall(PetscSFWindowAttach(sf, link, (void *)rootdata, wsize)); 885 link->rootdata = (void *)rootdata; 886 link->leafdata = (void *)leafdata; 887 link->bytes = bytes; 888 link->epoch = PETSC_FALSE; 889 link->inuse = PETSC_FALSE; 890 link->persistent = PETSC_TRUE; 891 w->wins = link; 892 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 893 PetscInt i; 894 895 PetscCall(PetscMalloc1(sf->nranks, &link->reqs)); 896 for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL; 897 } 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode PetscSFDeregisterPersistent_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata) 902 { 903 PetscSF_Window *w = (PetscSF_Window *)sf->data; 904 MPI_Aint bytes; 905 MPI_Comm wcomm; 906 PetscBool is_empty; 907 PetscSFWinLink *p; 908 909 PetscFunctionBegin; 910 PetscCall(PetscSFSetUp(sf)); 911 if (w->flavor != PETSCSF_WINDOW_FLAVOR_DYNAMIC) PetscFunctionReturn(PETSC_SUCCESS); 912 PetscCall(PetscSFGetDatatypeSize_Internal(PetscObjectComm((PetscObject)sf), unit, &bytes)); 913 wcomm = w->window_comm; 914 is_empty = w->is_empty; 915 if (is_empty) PetscFunctionReturn(PETSC_SUCCESS); 916 for (p = &w->wins; *p; p = &(*p)->next) { 917 PetscSFWinLink link = *p; 918 if (link->flavor == w->flavor && link->persistent && link->rootdata == rootdata && link->leafdata == leafdata && link->bytes == bytes) { 919 PetscCheck(!link->inuse, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window when communication is still in progress"); 920 PetscCheck(!link->epoch, wcomm, PETSC_ERR_ARG_WRONGSTATE, "Deregistering a window with an unconcluded epoch"); 921 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 922 PetscCallMPI(MPI_Win_detach(link->win, link->addr)); 923 link->addr = NULL; 924 #endif 925 PetscCall(PetscFree(link->dyn_target_addr)); 926 PetscCall(PetscFree(link->reqs)); 927 PetscCallMPI(MPI_Win_free(&link->win)); 928 *p = link->next; 929 PetscCall(PetscFree(link)); 930 break; 931 } 932 } 933 PetscFunctionReturn(PETSC_SUCCESS); 934 } 935 936 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf) 937 { 938 PetscFunctionBegin; 939 PetscCall(PetscSFReset_Window(sf)); 940 PetscCall(PetscFree(sf->data)); 941 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL)); 942 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL)); 943 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL)); 944 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL)); 945 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL)); 946 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL)); 947 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", NULL)); 948 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", NULL)); 949 PetscFunctionReturn(PETSC_SUCCESS); 950 } 951 952 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer) 953 { 954 PetscSF_Window *w = (PetscSF_Window *)sf->data; 955 PetscBool iascii; 956 PetscViewerFormat format; 957 958 PetscFunctionBegin; 959 PetscCall(PetscViewerGetFormat(viewer, &format)); 960 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 961 if (iascii) { 962 PetscCall(PetscViewerASCIIPrintf(viewer, " current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered")); 963 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 964 if (w->info != MPI_INFO_NULL) { 965 PetscMPIInt k, nkeys; 966 char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL]; 967 968 PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys)); 969 PetscCall(PetscViewerASCIIPrintf(viewer, " current info with %d keys. Ordered key-value pairs follow:\n", nkeys)); 970 for (k = 0; k < nkeys; k++) { 971 PetscMPIInt flag; 972 973 PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key)); 974 PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag)); 975 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key); 976 PetscCall(PetscViewerASCIIPrintf(viewer, " %s = %s\n", key, value)); 977 } 978 } else { 979 PetscCall(PetscViewerASCIIPrintf(viewer, " current info=MPI_INFO_NULL\n")); 980 } 981 } 982 } 983 PetscFunctionReturn(PETSC_SUCCESS); 984 } 985 986 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf) 987 { 988 PetscSF_Window *w = (PetscSF_Window *)sf->data; 989 PetscSFWindowSyncType synctype; 990 991 PetscFunctionBegin; 992 synctype = w->sync; 993 /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */ 994 if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK; 995 PetscCall(PetscSFWindowSetSyncType(newsf, synctype)); 996 PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor)); 997 PetscCall(PetscSFWindowSetInfo(newsf, w->info)); 998 PetscFunctionReturn(PETSC_SUCCESS); 999 } 1000 1001 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 1002 { 1003 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1004 PetscMPIInt nranks; 1005 const PetscMPIInt *ranks; 1006 const MPI_Aint *target_disp; 1007 const MPI_Datatype *mine, *remote; 1008 MPI_Request *reqs; 1009 MPI_Win win; 1010 1011 PetscFunctionBegin; 1012 PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented"); 1013 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1014 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 1015 PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win)); 1016 ranks = w->wcommranks; 1017 for (PetscMPIInt i = 0; i < nranks; i++) { 1018 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 1019 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 1020 PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 1021 #if defined(PETSC_HAVE_MPI_RGET) 1022 PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i])); 1023 #else 1024 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 1025 #endif 1026 } else { 1027 CHKMEMQ; 1028 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 1029 CHKMEMQ; 1030 } 1031 } 1032 PetscFunctionReturn(PETSC_SUCCESS); 1033 } 1034 1035 static PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 1036 { 1037 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1038 MPI_Win win; 1039 MPI_Request *reqs = NULL; 1040 1041 PetscFunctionBegin; 1042 PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs)); 1043 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 1044 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 1045 PetscMPIInt nranks; 1046 const PetscMPIInt *ranks; 1047 1048 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1049 ranks = w->wcommranks; 1050 for (PetscMPIInt i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1051 } 1052 PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 1057 { 1058 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1059 PetscMPIInt nranks; 1060 const PetscMPIInt *ranks; 1061 const MPI_Aint *target_disp; 1062 const MPI_Datatype *mine, *remote; 1063 MPI_Win win; 1064 1065 PetscFunctionBegin; 1066 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1067 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 1068 PetscCall(PetscSFWindowOpTranslate(&op)); 1069 PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 1070 ranks = w->wcommranks; 1071 for (PetscMPIInt i = 0; i < nranks; i++) { 1072 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 1073 1074 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 1075 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 1076 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1077 } 1078 PetscFunctionReturn(PETSC_SUCCESS); 1079 } 1080 1081 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 1082 { 1083 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1084 MPI_Win win; 1085 MPI_Request *reqs = NULL; 1086 1087 PetscFunctionBegin; 1088 PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs)); 1089 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 1090 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 1091 PetscFunctionReturn(PETSC_SUCCESS); 1092 } 1093 1094 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 1095 { 1096 PetscMPIInt nranks; 1097 const PetscMPIInt *ranks; 1098 const MPI_Datatype *mine, *remote; 1099 const MPI_Aint *target_disp; 1100 MPI_Win win; 1101 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1102 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1103 PetscSFWindowFlavorType oldf; 1104 #endif 1105 1106 PetscFunctionBegin; 1107 PetscCall(PetscSFGetRootRanks(sf, &nranks, NULL, NULL, NULL, NULL)); 1108 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 1109 PetscCall(PetscSFWindowOpTranslate(&op)); 1110 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1111 /* FetchAndOp without MPI_Get_Accumulate requires locking. 1112 we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */ 1113 oldf = w->flavor; 1114 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 1115 PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win)); 1116 #else 1117 PetscCall(PetscSFGetWindow(sf, unit, rootdata, (void *)leafdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 1118 #endif 1119 ranks = w->wcommranks; 1120 for (PetscMPIInt i = 0; i < nranks; i++) { 1121 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 1122 1123 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1124 PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win)); 1125 PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 1126 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 1127 PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1128 #else 1129 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win)); 1130 PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 1131 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 1132 #endif 1133 } 1134 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1135 w->flavor = oldf; 1136 #endif 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 1141 { 1142 MPI_Win win; 1143 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1144 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1145 #endif 1146 MPI_Request *reqs = NULL; 1147 1148 PetscFunctionBegin; 1149 PetscCall(PetscSFFindWindow(sf, unit, rootdata, leafdata, &win, &reqs)); 1150 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 1151 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1152 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 1153 #else 1154 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win)); 1155 #endif 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf) 1160 { 1161 PetscSF_Window *w = (PetscSF_Window *)sf->data; 1162 1163 PetscFunctionBegin; 1164 sf->ops->SetUp = PetscSFSetUp_Window; 1165 sf->ops->SetFromOptions = PetscSFSetFromOptions_Window; 1166 sf->ops->Reset = PetscSFReset_Window; 1167 sf->ops->Destroy = PetscSFDestroy_Window; 1168 sf->ops->View = PetscSFView_Window; 1169 sf->ops->Duplicate = PetscSFDuplicate_Window; 1170 sf->ops->BcastBegin = PetscSFBcastBegin_Window; 1171 sf->ops->BcastEnd = PetscSFBcastEnd_Window; 1172 sf->ops->ReduceBegin = PetscSFReduceBegin_Window; 1173 sf->ops->ReduceEnd = PetscSFReduceEnd_Window; 1174 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window; 1175 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window; 1176 1177 PetscCall(PetscNew(&w)); 1178 sf->data = (void *)w; 1179 w->sync = PETSCSF_WINDOW_SYNC_FENCE; 1180 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 1181 w->info = MPI_INFO_NULL; 1182 w->window_comm = MPI_COMM_NULL; 1183 1184 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFRegisterPersistent_C", PetscSFRegisterPersistent_Window)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFDeregisterPersistent_C", PetscSFDeregisterPersistent_Window)); 1192 1193 #if defined(PETSC_HAVE_OPENMPI) 1194 #if PETSC_PKG_OPENMPI_VERSION_LE(1, 6, 0) 1195 { 1196 PetscBool ackbug = PETSC_FALSE; 1197 PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL)); 1198 if (ackbug) { 1199 PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n")); 1200 } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed"); 1201 } 1202 #endif 1203 #endif 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206