1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 3 typedef struct _n_PetscSFDataLink *PetscSFDataLink; 4 typedef struct _n_PetscSFWinLink *PetscSFWinLink; 5 6 typedef struct { 7 PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */ 8 PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */ 9 PetscSFWinLink wins; /* List of active windows */ 10 PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */ 11 PetscSF dynsf; 12 MPI_Info info; 13 } PetscSF_Window; 14 15 struct _n_PetscSFDataLink { 16 MPI_Datatype unit; 17 MPI_Datatype *mine; 18 MPI_Datatype *remote; 19 PetscSFDataLink next; 20 }; 21 22 struct _n_PetscSFWinLink { 23 PetscBool inuse; 24 size_t bytes; 25 void *addr; 26 void *paddr; 27 MPI_Win win; 28 MPI_Request *reqs; 29 PetscSFWindowFlavorType flavor; 30 MPI_Aint *dyn_target_addr; 31 PetscBool epoch; 32 PetscSFWinLink next; 33 }; 34 35 const char *const PetscSFWindowSyncTypes[] = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL}; 36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL}; 37 38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */ 39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op) 40 { 41 PetscFunctionBegin; 42 if (*op == MPIU_SUM) *op = MPI_SUM; 43 else if (*op == MPIU_MAX) *op = MPI_MAX; 44 else if (*op == MPIU_MIN) *op = MPI_MIN; 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /*@C 49 PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank 50 51 Not Collective 52 53 Input Parameters: 54 + sf - star forest of type `PETSCSFWINDOW` 55 - unit - data type for each node 56 57 Output Parameters: 58 + localtypes - types describing part of local leaf buffer referencing each remote rank 59 - remotetypes - types describing part of remote root buffer referenced for each remote rank 60 61 Level: developer 62 63 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()` 64 @*/ 65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes) 66 { 67 PetscSF_Window *w = (PetscSF_Window *)sf->data; 68 PetscSFDataLink link; 69 PetscInt i, nranks; 70 const PetscInt *roffset, *rmine, *rremote; 71 const PetscMPIInt *ranks; 72 73 PetscFunctionBegin; 74 /* Look for types in cache */ 75 for (link = w->link; link; link = link->next) { 76 PetscBool match; 77 PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match)); 78 if (match) { 79 *localtypes = link->mine; 80 *remotetypes = link->remote; 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 } 84 85 /* Create new composite types for each send rank */ 86 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote)); 87 PetscCall(PetscNew(&link)); 88 PetscCallMPI(MPI_Type_dup(unit, &link->unit)); 89 PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote)); 90 for (i = 0; i < nranks; i++) { 91 PetscInt rcount = roffset[i + 1] - roffset[i]; 92 PetscMPIInt *rmine, *rremote; 93 #if !defined(PETSC_USE_64BIT_INDICES) 94 rmine = sf->rmine + sf->roffset[i]; 95 rremote = sf->rremote + sf->roffset[i]; 96 #else 97 PetscInt j; 98 PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote)); 99 for (j = 0; j < rcount; j++) { 100 PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j)); 101 PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j)); 102 } 103 #endif 104 105 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i])); 106 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i])); 107 #if defined(PETSC_USE_64BIT_INDICES) 108 PetscCall(PetscFree2(rmine, rremote)); 109 #endif 110 PetscCallMPI(MPI_Type_commit(&link->mine[i])); 111 PetscCallMPI(MPI_Type_commit(&link->remote[i])); 112 } 113 link->next = w->link; 114 w->link = link; 115 116 *localtypes = link->mine; 117 *remotetypes = link->remote; 118 PetscFunctionReturn(PETSC_SUCCESS); 119 } 120 121 /*@C 122 PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation 123 124 Logically Collective 125 126 Input Parameters: 127 + sf - star forest for communication of type `PETSCSFWINDOW` 128 - flavor - flavor type 129 130 Options Database Key: 131 . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`) 132 133 Level: advanced 134 135 Notes: 136 Windows reuse follows these rules: 137 .vb 138 PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create 139 140 PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g. 141 for i=1 to K 142 PetscSFOperationBegin(rootdata1,leafdata_whatever); 143 PetscSFOperationEnd(rootdata1,leafdata_whatever); 144 ... 145 PetscSFOperationBegin(rootdataN,leafdata_whatever); 146 PetscSFOperationEnd(rootdataN,leafdata_whatever); 147 endfor 148 The following pattern will instead raise an error 149 PetscSFOperationBegin(rootdata1,leafdata_whatever); 150 PetscSFOperationEnd(rootdata1,leafdata_whatever); 151 PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever); 152 PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever); 153 154 PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use 155 156 PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE 157 .ve 158 159 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()` 160 @*/ 161 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor) 162 { 163 PetscFunctionBegin; 164 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 165 PetscValidLogicalCollectiveEnum(sf, flavor, 2); 166 PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor)); 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor) 171 { 172 PetscSF_Window *w = (PetscSF_Window *)sf->data; 173 174 PetscFunctionBegin; 175 w->flavor = flavor; 176 PetscFunctionReturn(PETSC_SUCCESS); 177 } 178 179 /*@C 180 PetscSFWindowGetFlavorType - Get `PETSCSFWINDOW` flavor type for `PetscSF` communication 181 182 Logically Collective 183 184 Input Parameter: 185 . sf - star forest for communication of type `PETSCSFWINDOW` 186 187 Output Parameter: 188 . flavor - flavor type 189 190 Level: advanced 191 192 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()` 193 @*/ 194 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor) 195 { 196 PetscFunctionBegin; 197 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 198 PetscValidPointer(flavor, 2); 199 PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor)); 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor) 204 { 205 PetscSF_Window *w = (PetscSF_Window *)sf->data; 206 207 PetscFunctionBegin; 208 *flavor = w->flavor; 209 PetscFunctionReturn(PETSC_SUCCESS); 210 } 211 212 /*@C 213 PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 214 215 Logically Collective 216 217 Input Parameters: 218 + sf - star forest for communication 219 - sync - synchronization type 220 221 Options Database Key: 222 . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`) 223 224 Level: advanced 225 226 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType` 227 @*/ 228 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync) 229 { 230 PetscFunctionBegin; 231 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 232 PetscValidLogicalCollectiveEnum(sf, sync, 2); 233 PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync)); 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 237 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync) 238 { 239 PetscSF_Window *w = (PetscSF_Window *)sf->data; 240 241 PetscFunctionBegin; 242 w->sync = sync; 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 /*@C 247 PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 248 249 Logically Collective 250 251 Input Parameter: 252 . sf - star forest for communication 253 254 Output Parameter: 255 . sync - synchronization type 256 257 Level: advanced 258 259 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType` 260 @*/ 261 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync) 262 { 263 PetscFunctionBegin; 264 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 265 PetscValidPointer(sync, 2); 266 PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync)); 267 PetscFunctionReturn(PETSC_SUCCESS); 268 } 269 270 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync) 271 { 272 PetscSF_Window *w = (PetscSF_Window *)sf->data; 273 274 PetscFunctionBegin; 275 *sync = w->sync; 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 /*@C 280 PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation 281 282 Logically Collective 283 284 Input Parameters: 285 + sf - star forest for communication 286 - info - `MPI_Info` handle 287 288 Level: advanced 289 290 Note: 291 The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`. 292 293 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()` 294 @*/ 295 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info) 296 { 297 PetscFunctionBegin; 298 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 299 PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info)); 300 PetscFunctionReturn(PETSC_SUCCESS); 301 } 302 303 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info) 304 { 305 PetscSF_Window *w = (PetscSF_Window *)sf->data; 306 307 PetscFunctionBegin; 308 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 309 if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info)); 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 /*@C 314 PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation 315 316 Logically Collective 317 318 Input Parameter: 319 . sf - star forest for communication 320 321 Output Parameter: 322 . info - `MPI_Info` handle 323 324 Level: advanced 325 326 Note: 327 If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL` 328 329 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()` 330 @*/ 331 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info) 332 { 333 PetscFunctionBegin; 334 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 335 PetscValidPointer(info, 2); 336 PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info)); 337 PetscFunctionReturn(PETSC_SUCCESS); 338 } 339 340 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info) 341 { 342 PetscSF_Window *w = (PetscSF_Window *)sf->data; 343 344 PetscFunctionBegin; 345 *info = w->info; 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 /* 350 PetscSFGetWindow - Get a window for use with a given data type 351 352 Collective 353 354 Input Parameters: 355 + sf - star forest 356 . unit - data type 357 . array - array to be sent 358 . sync - type of synchronization `PetscSFWindowSyncType` 359 . epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window 360 . fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE` 361 . postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 362 - startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 363 364 Output Parameters: 365 + target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only) 366 + reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only) 367 - win - window 368 369 Level: developer 370 371 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()` 372 */ 373 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win) 374 { 375 PetscSF_Window *w = (PetscSF_Window *)sf->data; 376 MPI_Aint lb, lb_true, bytes, bytes_true; 377 PetscSFWinLink link; 378 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 379 MPI_Aint winaddr; 380 PetscInt nranks; 381 #endif 382 PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE; 383 PetscBool dummy[2]; 384 MPI_Aint wsize; 385 386 PetscFunctionBegin; 387 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 388 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 389 PetscCheck(lb == 0 && lb_true == 0, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 390 PetscCheck(bytes == bytes_true, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature"); 391 if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 392 for (link = w->wins; reuse && link; link = link->next) { 393 PetscBool winok = PETSC_FALSE; 394 if (w->flavor != link->flavor) continue; 395 switch (w->flavor) { 396 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */ 397 if (array == link->addr) { 398 if (PetscDefined(USE_DEBUG)) { 399 dummy[0] = PETSC_TRUE; 400 dummy[1] = PETSC_TRUE; 401 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf))); 402 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 403 PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead"); 404 } 405 PetscCheck(!link->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window in use"); 406 PetscCheck(!epoch || !link->epoch, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window epoch not finished"); 407 winok = PETSC_TRUE; 408 link->paddr = array; 409 } else if (PetscDefined(USE_DEBUG)) { 410 dummy[0] = PETSC_FALSE; 411 dummy[1] = PETSC_FALSE; 412 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf))); 413 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 414 PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead"); 415 } 416 break; 417 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */ 418 case PETSCSF_WINDOW_FLAVOR_SHARED: 419 if (!link->inuse && bytes == (MPI_Aint)link->bytes) { 420 update = PETSC_TRUE; 421 link->paddr = array; 422 winok = PETSC_TRUE; 423 } 424 break; 425 default: 426 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]); 427 } 428 if (winok) { 429 *win = link->win; 430 PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 431 goto found; 432 } 433 } 434 435 wsize = (MPI_Aint)bytes * sf->nroots; 436 PetscCall(PetscNew(&link)); 437 link->bytes = bytes; 438 link->next = w->wins; 439 link->flavor = w->flavor; 440 link->dyn_target_addr = NULL; 441 link->reqs = NULL; 442 w->wins = link; 443 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 444 PetscInt i; 445 446 PetscCall(PetscMalloc1(sf->nranks, &link->reqs)); 447 for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL; 448 } 449 switch (w->flavor) { 450 case PETSCSF_WINDOW_FLAVOR_CREATE: 451 PetscCallMPI(MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win)); 452 link->addr = array; 453 link->paddr = array; 454 break; 455 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 456 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: 457 PetscCallMPI(MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win)); 458 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */ 459 PetscCallMPI(MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize)); 460 #else 461 PetscCallMPI(MPI_Win_attach(link->win, array, wsize)); 462 #endif 463 link->addr = array; 464 link->paddr = array; 465 PetscCheck(w->dynsf, PetscObjectComm((PetscObject)sf), PETSC_ERR_ORDER, "Must call PetscSFSetUp()"); 466 PetscCall(PetscSFSetUp(w->dynsf)); 467 PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL)); 468 PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr)); 469 PetscCallMPI(MPI_Get_address(array, &winaddr)); 470 PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 471 PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 472 break; 473 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: 474 PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win)); 475 update = PETSC_TRUE; 476 link->paddr = array; 477 break; 478 #endif 479 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 480 case PETSCSF_WINDOW_FLAVOR_SHARED: 481 PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win)); 482 update = PETSC_TRUE; 483 link->paddr = array; 484 break; 485 #endif 486 default: 487 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]); 488 } 489 PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 490 *win = link->win; 491 492 found: 493 494 if (target_disp) *target_disp = link->dyn_target_addr; 495 if (reqs) *reqs = link->reqs; 496 if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */ 497 PetscMPIInt rank; 498 499 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 500 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win)); 501 PetscCall(PetscMemcpy(link->addr, array, sf->nroots * bytes)); 502 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 503 PetscCallMPI(MPI_Win_unlock(rank, *win)); 504 PetscCallMPI(MPI_Win_fence(0, *win)); 505 } 506 } 507 link->inuse = PETSC_TRUE; 508 link->epoch = epoch; 509 if (epoch) { 510 switch (sync) { 511 case PETSCSF_WINDOW_SYNC_FENCE: 512 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 513 break; 514 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 515 break; 516 case PETSCSF_WINDOW_SYNC_ACTIVE: { 517 MPI_Group ingroup, outgroup; 518 PetscMPIInt isize, osize; 519 520 /* OpenMPI 4.0.2 with btl=vader does not like calling 521 - MPI_Win_complete when ogroup is empty 522 - MPI_Win_wait when igroup is empty 523 So, we do not even issue the corresponding start and post calls 524 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 525 start(outgroup) has a matching post(ingroup) 526 and this is guaranteed by PetscSF 527 */ 528 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 529 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 530 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 531 if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win)); 532 if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win)); 533 } break; 534 default: 535 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type"); 536 } 537 } 538 PetscFunctionReturn(PETSC_SUCCESS); 539 } 540 541 /* 542 PetscSFFindWindow - Finds a window that is already in use 543 544 Not Collective 545 546 Input Parameters: 547 + sf - star forest 548 . unit - data type 549 - array - array with which the window is associated 550 551 Output Parameters: 552 + win - window 553 - reqs - outstanding requests associated to the window 554 555 Level: developer 556 557 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 558 */ 559 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs) 560 { 561 PetscSF_Window *w = (PetscSF_Window *)sf->data; 562 PetscSFWinLink link; 563 564 PetscFunctionBegin; 565 *win = MPI_WIN_NULL; 566 for (link = w->wins; link; link = link->next) { 567 if (array == link->paddr) { 568 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 569 *win = link->win; 570 *reqs = link->reqs; 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 } 574 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 575 } 576 577 /* 578 PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()` 579 580 Collective 581 582 Input Parameters: 583 + sf - star forest 584 . unit - data type 585 . array - array associated with window 586 . sync - type of synchronization `PetscSFWindowSyncType` 587 . epoch - close an epoch, must match argument to `PetscSFGetWindow()` 588 . update - if we have to update the local window array 589 - win - window 590 591 Level: developer 592 593 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()` 594 */ 595 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win) 596 { 597 PetscSF_Window *w = (PetscSF_Window *)sf->data; 598 PetscSFWinLink *p, link; 599 PetscBool reuse = PETSC_FALSE; 600 PetscSFWindowFlavorType flavor; 601 void *laddr; 602 size_t bytes; 603 604 PetscFunctionBegin; 605 for (p = &w->wins; *p; p = &(*p)->next) { 606 link = *p; 607 if (*win == link->win) { 608 PetscCheck(array == link->paddr, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array"); 609 if (epoch != link->epoch) { 610 PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end"); 611 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch"); 612 } 613 laddr = link->addr; 614 flavor = link->flavor; 615 bytes = link->bytes; 616 if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 617 else { 618 *p = link->next; 619 update = PETSC_FALSE; 620 } /* remove from list */ 621 goto found; 622 } 623 } 624 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 625 626 found: 627 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 628 if (epoch) { 629 switch (sync) { 630 case PETSCSF_WINDOW_SYNC_FENCE: 631 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 632 break; 633 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 634 break; 635 case PETSCSF_WINDOW_SYNC_ACTIVE: { 636 MPI_Group ingroup, outgroup; 637 PetscMPIInt isize, osize; 638 639 /* OpenMPI 4.0.2 with btl=wader does not like calling 640 - MPI_Win_complete when ogroup is empty 641 - MPI_Win_wait when igroup is empty 642 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 643 - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete 644 - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait 645 */ 646 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 647 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 648 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 649 if (osize) PetscCallMPI(MPI_Win_complete(*win)); 650 if (isize) PetscCallMPI(MPI_Win_wait(*win)); 651 } break; 652 default: 653 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type"); 654 } 655 } 656 if (update) { 657 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win)); 658 PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes)); 659 } 660 link->epoch = PETSC_FALSE; 661 link->inuse = PETSC_FALSE; 662 link->paddr = NULL; 663 if (!reuse) { 664 PetscCall(PetscFree(link->dyn_target_addr)); 665 PetscCall(PetscFree(link->reqs)); 666 PetscCallMPI(MPI_Win_free(&link->win)); 667 PetscCall(PetscFree(link)); 668 *win = MPI_WIN_NULL; 669 } 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf) 674 { 675 PetscSF_Window *w = (PetscSF_Window *)sf->data; 676 MPI_Group ingroup, outgroup; 677 678 PetscFunctionBegin; 679 PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY)); 680 if (!w->dynsf) { 681 PetscInt i; 682 PetscSFNode *remotes; 683 684 PetscCall(PetscMalloc1(sf->nranks, &remotes)); 685 for (i = 0; i < sf->nranks; i++) { 686 remotes[i].rank = sf->ranks[i]; 687 remotes[i].index = 0; 688 } 689 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf)); 690 PetscCall(PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */ 691 PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER)); 692 } 693 switch (w->sync) { 694 case PETSCSF_WINDOW_SYNC_ACTIVE: 695 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 696 default: 697 break; 698 } 699 PetscFunctionReturn(PETSC_SUCCESS); 700 } 701 702 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject) 703 { 704 PetscSF_Window *w = (PetscSF_Window *)sf->data; 705 PetscSFWindowFlavorType flavor = w->flavor; 706 707 PetscFunctionBegin; 708 PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options"); 709 PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL)); 710 PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL)); 711 PetscCall(PetscSFWindowSetFlavorType(sf, flavor)); 712 PetscOptionsHeadEnd(); 713 PetscFunctionReturn(PETSC_SUCCESS); 714 } 715 716 static PetscErrorCode PetscSFReset_Window(PetscSF sf) 717 { 718 PetscSF_Window *w = (PetscSF_Window *)sf->data; 719 PetscSFDataLink link, next; 720 PetscSFWinLink wlink, wnext; 721 PetscInt i; 722 723 PetscFunctionBegin; 724 for (link = w->link; link; link = next) { 725 next = link->next; 726 PetscCallMPI(MPI_Type_free(&link->unit)); 727 for (i = 0; i < sf->nranks; i++) { 728 PetscCallMPI(MPI_Type_free(&link->mine[i])); 729 PetscCallMPI(MPI_Type_free(&link->remote[i])); 730 } 731 PetscCall(PetscFree2(link->mine, link->remote)); 732 PetscCall(PetscFree(link)); 733 } 734 w->link = NULL; 735 for (wlink = w->wins; wlink; wlink = wnext) { 736 wnext = wlink->next; 737 PetscCheck(!wlink->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr); 738 PetscCall(PetscFree(wlink->dyn_target_addr)); 739 PetscCall(PetscFree(wlink->reqs)); 740 PetscCallMPI(MPI_Win_free(&wlink->win)); 741 PetscCall(PetscFree(wlink)); 742 } 743 w->wins = NULL; 744 PetscCall(PetscSFDestroy(&w->dynsf)); 745 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 746 PetscFunctionReturn(PETSC_SUCCESS); 747 } 748 749 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf) 750 { 751 PetscFunctionBegin; 752 PetscCall(PetscSFReset_Window(sf)); 753 PetscCall(PetscFree(sf->data)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL)); 756 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL)); 757 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL)); 758 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL)); 760 PetscFunctionReturn(PETSC_SUCCESS); 761 } 762 763 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer) 764 { 765 PetscSF_Window *w = (PetscSF_Window *)sf->data; 766 PetscBool iascii; 767 PetscViewerFormat format; 768 769 PetscFunctionBegin; 770 PetscCall(PetscViewerGetFormat(viewer, &format)); 771 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 772 if (iascii) { 773 PetscCall(PetscViewerASCIIPrintf(viewer, " current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered")); 774 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 775 if (w->info != MPI_INFO_NULL) { 776 PetscMPIInt k, nkeys; 777 char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL]; 778 779 PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys)); 780 PetscCall(PetscViewerASCIIPrintf(viewer, " current info with %d keys. Ordered key-value pairs follow:\n", nkeys)); 781 for (k = 0; k < nkeys; k++) { 782 PetscMPIInt flag; 783 784 PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key)); 785 PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag)); 786 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key); 787 PetscCall(PetscViewerASCIIPrintf(viewer, " %s = %s\n", key, value)); 788 } 789 } else { 790 PetscCall(PetscViewerASCIIPrintf(viewer, " current info=MPI_INFO_NULL\n")); 791 } 792 } 793 } 794 PetscFunctionReturn(PETSC_SUCCESS); 795 } 796 797 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf) 798 { 799 PetscSF_Window *w = (PetscSF_Window *)sf->data; 800 PetscSFWindowSyncType synctype; 801 802 PetscFunctionBegin; 803 synctype = w->sync; 804 /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */ 805 if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK; 806 PetscCall(PetscSFWindowSetSyncType(newsf, synctype)); 807 PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor)); 808 PetscCall(PetscSFWindowSetInfo(newsf, w->info)); 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 813 { 814 PetscSF_Window *w = (PetscSF_Window *)sf->data; 815 PetscInt i, nranks; 816 const PetscMPIInt *ranks; 817 const MPI_Aint *target_disp; 818 const MPI_Datatype *mine, *remote; 819 MPI_Request *reqs; 820 MPI_Win win; 821 822 PetscFunctionBegin; 823 PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented"); 824 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 825 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 826 PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win)); 827 for (i = 0; i < nranks; i++) { 828 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 829 830 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 831 PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 832 #if defined(PETSC_HAVE_MPI_RGET) 833 PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i])); 834 #else 835 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 836 #endif 837 } else { 838 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 839 } 840 } 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 845 { 846 PetscSF_Window *w = (PetscSF_Window *)sf->data; 847 MPI_Win win; 848 MPI_Request *reqs = NULL; 849 850 PetscFunctionBegin; 851 PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs)); 852 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 853 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 854 PetscInt i, nranks; 855 const PetscMPIInt *ranks; 856 857 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 858 for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 859 } 860 PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win)); 861 PetscFunctionReturn(PETSC_SUCCESS); 862 } 863 864 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 865 { 866 PetscSF_Window *w = (PetscSF_Window *)sf->data; 867 PetscInt i, nranks; 868 const PetscMPIInt *ranks; 869 const MPI_Aint *target_disp; 870 const MPI_Datatype *mine, *remote; 871 MPI_Win win; 872 873 PetscFunctionBegin; 874 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 875 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 876 PetscCall(PetscSFWindowOpTranslate(&op)); 877 PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 878 for (i = 0; i < nranks; i++) { 879 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 880 881 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 882 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 883 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 884 } 885 PetscFunctionReturn(PETSC_SUCCESS); 886 } 887 888 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 889 { 890 PetscSF_Window *w = (PetscSF_Window *)sf->data; 891 MPI_Win win; 892 MPI_Request *reqs = NULL; 893 894 PetscFunctionBegin; 895 PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs)); 896 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 897 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 902 { 903 PetscInt i, nranks; 904 const PetscMPIInt *ranks; 905 const MPI_Datatype *mine, *remote; 906 const MPI_Aint *target_disp; 907 MPI_Win win; 908 PetscSF_Window *w = (PetscSF_Window *)sf->data; 909 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 910 PetscSFWindowFlavorType oldf; 911 #endif 912 913 PetscFunctionBegin; 914 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 915 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 916 PetscCall(PetscSFWindowOpTranslate(&op)); 917 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 918 /* FetchAndOp without MPI_Get_Accumulate requires locking. 919 we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */ 920 oldf = w->flavor; 921 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 922 PetscCall(PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win)); 923 #else 924 PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 925 #endif 926 for (i = 0; i < nranks; i++) { 927 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 928 929 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 930 PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win)); 931 PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 932 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 933 PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 934 #else 935 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win)); 936 PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 937 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 938 #endif 939 } 940 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 941 w->flavor = oldf; 942 #endif 943 PetscFunctionReturn(PETSC_SUCCESS); 944 } 945 946 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 947 { 948 MPI_Win win; 949 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 950 PetscSF_Window *w = (PetscSF_Window *)sf->data; 951 #endif 952 MPI_Request *reqs = NULL; 953 954 PetscFunctionBegin; 955 PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs)); 956 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 957 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 958 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 959 #else 960 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win)); 961 #endif 962 PetscFunctionReturn(PETSC_SUCCESS); 963 } 964 965 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf) 966 { 967 PetscSF_Window *w = (PetscSF_Window *)sf->data; 968 969 PetscFunctionBegin; 970 sf->ops->SetUp = PetscSFSetUp_Window; 971 sf->ops->SetFromOptions = PetscSFSetFromOptions_Window; 972 sf->ops->Reset = PetscSFReset_Window; 973 sf->ops->Destroy = PetscSFDestroy_Window; 974 sf->ops->View = PetscSFView_Window; 975 sf->ops->Duplicate = PetscSFDuplicate_Window; 976 sf->ops->BcastBegin = PetscSFBcastBegin_Window; 977 sf->ops->BcastEnd = PetscSFBcastEnd_Window; 978 sf->ops->ReduceBegin = PetscSFReduceBegin_Window; 979 sf->ops->ReduceEnd = PetscSFReduceEnd_Window; 980 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window; 981 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window; 982 983 PetscCall(PetscNew(&w)); 984 sf->data = (void *)w; 985 w->sync = PETSCSF_WINDOW_SYNC_FENCE; 986 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 987 w->info = MPI_INFO_NULL; 988 989 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window)); 990 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window)); 991 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window)); 992 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window)); 993 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window)); 994 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window)); 995 996 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6)) 997 { 998 PetscBool ackbug = PETSC_FALSE; 999 PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL)); 1000 if (ackbug) { 1001 PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n")); 1002 } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed"); 1003 } 1004 #endif 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007