1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 3 typedef struct _n_PetscSFDataLink *PetscSFDataLink; 4 typedef struct _n_PetscSFWinLink *PetscSFWinLink; 5 6 typedef struct { 7 PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */ 8 PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */ 9 PetscSFWinLink wins; /* List of active windows */ 10 PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */ 11 PetscSF dynsf; 12 MPI_Info info; 13 } PetscSF_Window; 14 15 struct _n_PetscSFDataLink { 16 MPI_Datatype unit; 17 MPI_Datatype *mine; 18 MPI_Datatype *remote; 19 PetscSFDataLink next; 20 }; 21 22 struct _n_PetscSFWinLink { 23 PetscBool inuse; 24 size_t bytes; 25 void *addr; 26 void *paddr; 27 MPI_Win win; 28 MPI_Request *reqs; 29 PetscSFWindowFlavorType flavor; 30 MPI_Aint *dyn_target_addr; 31 PetscBool epoch; 32 PetscSFWinLink next; 33 }; 34 35 const char *const PetscSFWindowSyncTypes[] = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL}; 36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL}; 37 38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */ 39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op) 40 { 41 PetscFunctionBegin; 42 if (*op == MPIU_SUM) *op = MPI_SUM; 43 else if (*op == MPIU_MAX) *op = MPI_MAX; 44 else if (*op == MPIU_MIN) *op = MPI_MIN; 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /*@C 49 PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank 50 51 Not Collective 52 53 Input Parameters: 54 + sf - star forest of type `PETSCSFWINDOW` 55 - unit - data type for each node 56 57 Output Parameters: 58 + localtypes - types describing part of local leaf buffer referencing each remote rank 59 - remotetypes - types describing part of remote root buffer referenced for each remote rank 60 61 Level: developer 62 63 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()` 64 @*/ 65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes) 66 { 67 PetscSF_Window *w = (PetscSF_Window *)sf->data; 68 PetscSFDataLink link; 69 PetscInt i, nranks; 70 const PetscInt *roffset, *rmine, *rremote; 71 const PetscMPIInt *ranks; 72 73 PetscFunctionBegin; 74 /* Look for types in cache */ 75 for (link = w->link; link; link = link->next) { 76 PetscBool match; 77 PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match)); 78 if (match) { 79 *localtypes = link->mine; 80 *remotetypes = link->remote; 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 } 84 85 /* Create new composite types for each send rank */ 86 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote)); 87 PetscCall(PetscNew(&link)); 88 PetscCallMPI(MPI_Type_dup(unit, &link->unit)); 89 PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote)); 90 for (i = 0; i < nranks; i++) { 91 PetscInt rcount = roffset[i + 1] - roffset[i]; 92 PetscMPIInt *rmine, *rremote; 93 #if !defined(PETSC_USE_64BIT_INDICES) 94 rmine = sf->rmine + sf->roffset[i]; 95 rremote = sf->rremote + sf->roffset[i]; 96 #else 97 PetscInt j; 98 PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote)); 99 for (j = 0; j < rcount; j++) { 100 PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j)); 101 PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j)); 102 } 103 #endif 104 105 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i])); 106 PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i])); 107 #if defined(PETSC_USE_64BIT_INDICES) 108 PetscCall(PetscFree2(rmine, rremote)); 109 #endif 110 PetscCallMPI(MPI_Type_commit(&link->mine[i])); 111 PetscCallMPI(MPI_Type_commit(&link->remote[i])); 112 } 113 link->next = w->link; 114 w->link = link; 115 116 *localtypes = link->mine; 117 *remotetypes = link->remote; 118 PetscFunctionReturn(PETSC_SUCCESS); 119 } 120 121 /*@C 122 PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation 123 124 Logically Collective 125 126 Input Parameters: 127 + sf - star forest for communication of type `PETSCSFWINDOW` 128 - flavor - flavor type 129 130 Options Database Key: 131 . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`) 132 133 Level: advanced 134 135 Notes: 136 137 Windows reuse follows these rules: 138 .vb 139 PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create 140 141 PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g. 142 for i=1 to K 143 PetscSFOperationBegin(rootdata1,leafdata_whatever); 144 PetscSFOperationEnd(rootdata1,leafdata_whatever); 145 ... 146 PetscSFOperationBegin(rootdataN,leafdata_whatever); 147 PetscSFOperationEnd(rootdataN,leafdata_whatever); 148 endfor 149 The following pattern will instead raise an error 150 PetscSFOperationBegin(rootdata1,leafdata_whatever); 151 PetscSFOperationEnd(rootdata1,leafdata_whatever); 152 PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever); 153 PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever); 154 155 PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use 156 157 PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE 158 .ve 159 160 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()` 161 @*/ 162 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor) 163 { 164 PetscFunctionBegin; 165 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 166 PetscValidLogicalCollectiveEnum(sf, flavor, 2); 167 PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor)); 168 PetscFunctionReturn(PETSC_SUCCESS); 169 } 170 171 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor) 172 { 173 PetscSF_Window *w = (PetscSF_Window *)sf->data; 174 175 PetscFunctionBegin; 176 w->flavor = flavor; 177 PetscFunctionReturn(PETSC_SUCCESS); 178 } 179 180 /*@C 181 PetscSFWindowGetFlavorType - Get `PETSCSFWINDOW` flavor type for `PetscSF` communication 182 183 Logically Collective 184 185 Input Parameter: 186 . sf - star forest for communication of type `PETSCSFWINDOW` 187 188 Output Parameter: 189 . flavor - flavor type 190 191 Level: advanced 192 193 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()` 194 @*/ 195 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor) 196 { 197 PetscFunctionBegin; 198 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 199 PetscValidPointer(flavor, 2); 200 PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor)); 201 PetscFunctionReturn(PETSC_SUCCESS); 202 } 203 204 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor) 205 { 206 PetscSF_Window *w = (PetscSF_Window *)sf->data; 207 208 PetscFunctionBegin; 209 *flavor = w->flavor; 210 PetscFunctionReturn(PETSC_SUCCESS); 211 } 212 213 /*@C 214 PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 215 216 Logically Collective 217 218 Input Parameters: 219 + sf - star forest for communication 220 - sync - synchronization type 221 222 Options Database Key: 223 . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`) 224 225 Level: advanced 226 227 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType` 228 @*/ 229 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync) 230 { 231 PetscFunctionBegin; 232 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 233 PetscValidLogicalCollectiveEnum(sf, sync, 2); 234 PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync)); 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync) 239 { 240 PetscSF_Window *w = (PetscSF_Window *)sf->data; 241 242 PetscFunctionBegin; 243 w->sync = sync; 244 PetscFunctionReturn(PETSC_SUCCESS); 245 } 246 247 /*@C 248 PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW` 249 250 Logically Collective 251 252 Input Parameter: 253 . sf - star forest for communication 254 255 Output Parameter: 256 . sync - synchronization type 257 258 Level: advanced 259 260 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType` 261 @*/ 262 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync) 263 { 264 PetscFunctionBegin; 265 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 266 PetscValidPointer(sync, 2); 267 PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync)); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync) 272 { 273 PetscSF_Window *w = (PetscSF_Window *)sf->data; 274 275 PetscFunctionBegin; 276 *sync = w->sync; 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 /*@C 281 PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation 282 283 Logically Collective 284 285 Input Parameters: 286 + sf - star forest for communication 287 - info - `MPI_Info` handle 288 289 Level: advanced 290 291 Note: 292 The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`. 293 294 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()` 295 @*/ 296 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info) 297 { 298 PetscFunctionBegin; 299 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 300 PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info) 305 { 306 PetscSF_Window *w = (PetscSF_Window *)sf->data; 307 308 PetscFunctionBegin; 309 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 310 if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info)); 311 PetscFunctionReturn(PETSC_SUCCESS); 312 } 313 314 /*@C 315 PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation 316 317 Logically Collective 318 319 Input Parameter: 320 . sf - star forest for communication 321 322 Output Parameter: 323 . info - `MPI_Info` handle 324 325 Level: advanced 326 327 Note: 328 If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL` 329 330 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()` 331 @*/ 332 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info) 333 { 334 PetscFunctionBegin; 335 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 336 PetscValidPointer(info, 2); 337 PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info)); 338 PetscFunctionReturn(PETSC_SUCCESS); 339 } 340 341 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info) 342 { 343 PetscSF_Window *w = (PetscSF_Window *)sf->data; 344 345 PetscFunctionBegin; 346 *info = w->info; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 /* 351 PetscSFGetWindow - Get a window for use with a given data type 352 353 Collective 354 355 Input Parameters: 356 + sf - star forest 357 . unit - data type 358 . array - array to be sent 359 . sync - type of synchronization `PetscSFWindowSyncType` 360 . epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window 361 . fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE` 362 . postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 363 - startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE` 364 365 Output Parameters: 366 + target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only) 367 + reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only) 368 - win - window 369 370 Level: developer 371 372 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()` 373 */ 374 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win) 375 { 376 PetscSF_Window *w = (PetscSF_Window *)sf->data; 377 MPI_Aint lb, lb_true, bytes, bytes_true; 378 PetscSFWinLink link; 379 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 380 MPI_Aint winaddr; 381 PetscInt nranks; 382 #endif 383 PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE; 384 PetscBool dummy[2]; 385 MPI_Aint wsize; 386 387 PetscFunctionBegin; 388 PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes)); 389 PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true)); 390 PetscCheck(lb == 0 && lb_true == 0, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 391 PetscCheck(bytes == bytes_true, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature"); 392 if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 393 for (link = w->wins; reuse && link; link = link->next) { 394 PetscBool winok = PETSC_FALSE; 395 if (w->flavor != link->flavor) continue; 396 switch (w->flavor) { 397 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */ 398 if (array == link->addr) { 399 if (PetscDefined(USE_DEBUG)) { 400 dummy[0] = PETSC_TRUE; 401 dummy[1] = PETSC_TRUE; 402 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf))); 403 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 404 PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead"); 405 } 406 PetscCheck(!link->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window in use"); 407 PetscCheck(!epoch || !link->epoch, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window epoch not finished"); 408 winok = PETSC_TRUE; 409 link->paddr = array; 410 } else if (PetscDefined(USE_DEBUG)) { 411 dummy[0] = PETSC_FALSE; 412 dummy[1] = PETSC_FALSE; 413 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf))); 414 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 415 PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead"); 416 } 417 break; 418 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */ 419 case PETSCSF_WINDOW_FLAVOR_SHARED: 420 if (!link->inuse && bytes == (MPI_Aint)link->bytes) { 421 update = PETSC_TRUE; 422 link->paddr = array; 423 winok = PETSC_TRUE; 424 } 425 break; 426 default: 427 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]); 428 } 429 if (winok) { 430 *win = link->win; 431 PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 432 goto found; 433 } 434 } 435 436 wsize = (MPI_Aint)bytes * sf->nroots; 437 PetscCall(PetscNew(&link)); 438 link->bytes = bytes; 439 link->next = w->wins; 440 link->flavor = w->flavor; 441 link->dyn_target_addr = NULL; 442 link->reqs = NULL; 443 w->wins = link; 444 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 445 PetscInt i; 446 447 PetscCall(PetscMalloc1(sf->nranks, &link->reqs)); 448 for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL; 449 } 450 switch (w->flavor) { 451 case PETSCSF_WINDOW_FLAVOR_CREATE: 452 PetscCallMPI(MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win)); 453 link->addr = array; 454 link->paddr = array; 455 break; 456 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW) 457 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: 458 PetscCallMPI(MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win)); 459 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */ 460 PetscCallMPI(MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize)); 461 #else 462 PetscCallMPI(MPI_Win_attach(link->win, array, wsize)); 463 #endif 464 link->addr = array; 465 link->paddr = array; 466 PetscCheck(w->dynsf, PetscObjectComm((PetscObject)sf), PETSC_ERR_ORDER, "Must call PetscSFSetUp()"); 467 PetscCall(PetscSFSetUp(w->dynsf)); 468 PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL)); 469 PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr)); 470 PetscCallMPI(MPI_Get_address(array, &winaddr)); 471 PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 472 PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE)); 473 break; 474 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: 475 PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win)); 476 update = PETSC_TRUE; 477 link->paddr = array; 478 break; 479 #endif 480 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 481 case PETSCSF_WINDOW_FLAVOR_SHARED: 482 PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win)); 483 update = PETSC_TRUE; 484 link->paddr = array; 485 break; 486 #endif 487 default: 488 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]); 489 } 490 PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 491 *win = link->win; 492 493 found: 494 495 if (target_disp) *target_disp = link->dyn_target_addr; 496 if (reqs) *reqs = link->reqs; 497 if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */ 498 PetscMPIInt rank; 499 500 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 501 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win)); 502 PetscCall(PetscMemcpy(link->addr, array, sf->nroots * bytes)); 503 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 504 PetscCallMPI(MPI_Win_unlock(rank, *win)); 505 PetscCallMPI(MPI_Win_fence(0, *win)); 506 } 507 } 508 link->inuse = PETSC_TRUE; 509 link->epoch = epoch; 510 if (epoch) { 511 switch (sync) { 512 case PETSCSF_WINDOW_SYNC_FENCE: 513 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 514 break; 515 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 516 break; 517 case PETSCSF_WINDOW_SYNC_ACTIVE: { 518 MPI_Group ingroup, outgroup; 519 PetscMPIInt isize, osize; 520 521 /* OpenMPI 4.0.2 with btl=vader does not like calling 522 - MPI_Win_complete when ogroup is empty 523 - MPI_Win_wait when igroup is empty 524 So, we do not even issue the corresponding start and post calls 525 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 526 start(outgroup) has a matching post(ingroup) 527 and this is guaranteed by PetscSF 528 */ 529 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 530 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 531 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 532 if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win)); 533 if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win)); 534 } break; 535 default: 536 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type"); 537 } 538 } 539 PetscFunctionReturn(PETSC_SUCCESS); 540 } 541 542 /* 543 PetscSFFindWindow - Finds a window that is already in use 544 545 Not Collective 546 547 Input Parameters: 548 + sf - star forest 549 . unit - data type 550 - array - array with which the window is associated 551 552 Output Parameters: 553 + win - window 554 - reqs - outstanding requests associated to the window 555 556 Level: developer 557 558 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()` 559 */ 560 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs) 561 { 562 PetscSF_Window *w = (PetscSF_Window *)sf->data; 563 PetscSFWinLink link; 564 565 PetscFunctionBegin; 566 *win = MPI_WIN_NULL; 567 for (link = w->wins; link; link = link->next) { 568 if (array == link->paddr) { 569 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 570 *win = link->win; 571 *reqs = link->reqs; 572 PetscFunctionReturn(PETSC_SUCCESS); 573 } 574 } 575 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 576 } 577 578 /* 579 PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()` 580 581 Collective 582 583 Input Parameters: 584 + sf - star forest 585 . unit - data type 586 . array - array associated with window 587 . sync - type of synchronization `PetscSFWindowSyncType` 588 . epoch - close an epoch, must match argument to `PetscSFGetWindow()` 589 . update - if we have to update the local window array 590 - win - window 591 592 Level: developer 593 594 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()` 595 */ 596 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win) 597 { 598 PetscSF_Window *w = (PetscSF_Window *)sf->data; 599 PetscSFWinLink *p, link; 600 PetscBool reuse = PETSC_FALSE; 601 PetscSFWindowFlavorType flavor; 602 void *laddr; 603 size_t bytes; 604 605 PetscFunctionBegin; 606 for (p = &w->wins; *p; p = &(*p)->next) { 607 link = *p; 608 if (*win == link->win) { 609 PetscCheck(array == link->paddr, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array"); 610 if (epoch != link->epoch) { 611 PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end"); 612 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch"); 613 } 614 laddr = link->addr; 615 flavor = link->flavor; 616 bytes = link->bytes; 617 if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 618 else { 619 *p = link->next; 620 update = PETSC_FALSE; 621 } /* remove from list */ 622 goto found; 623 } 624 } 625 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use"); 626 627 found: 628 PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf))); 629 if (epoch) { 630 switch (sync) { 631 case PETSCSF_WINDOW_SYNC_FENCE: 632 PetscCallMPI(MPI_Win_fence(fenceassert, *win)); 633 break; 634 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 635 break; 636 case PETSCSF_WINDOW_SYNC_ACTIVE: { 637 MPI_Group ingroup, outgroup; 638 PetscMPIInt isize, osize; 639 640 /* OpenMPI 4.0.2 with btl=wader does not like calling 641 - MPI_Win_complete when ogroup is empty 642 - MPI_Win_wait when igroup is empty 643 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 644 - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete 645 - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait 646 */ 647 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 648 PetscCallMPI(MPI_Group_size(ingroup, &isize)); 649 PetscCallMPI(MPI_Group_size(outgroup, &osize)); 650 if (osize) PetscCallMPI(MPI_Win_complete(*win)); 651 if (isize) PetscCallMPI(MPI_Win_wait(*win)); 652 } break; 653 default: 654 SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type"); 655 } 656 } 657 if (update) { 658 if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win)); 659 PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes)); 660 } 661 link->epoch = PETSC_FALSE; 662 link->inuse = PETSC_FALSE; 663 link->paddr = NULL; 664 if (!reuse) { 665 PetscCall(PetscFree(link->dyn_target_addr)); 666 PetscCall(PetscFree(link->reqs)); 667 PetscCallMPI(MPI_Win_free(&link->win)); 668 PetscCall(PetscFree(link)); 669 *win = MPI_WIN_NULL; 670 } 671 PetscFunctionReturn(PETSC_SUCCESS); 672 } 673 674 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf) 675 { 676 PetscSF_Window *w = (PetscSF_Window *)sf->data; 677 MPI_Group ingroup, outgroup; 678 679 PetscFunctionBegin; 680 PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY)); 681 if (!w->dynsf) { 682 PetscInt i; 683 PetscSFNode *remotes; 684 685 PetscCall(PetscMalloc1(sf->nranks, &remotes)); 686 for (i = 0; i < sf->nranks; i++) { 687 remotes[i].rank = sf->ranks[i]; 688 remotes[i].index = 0; 689 } 690 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf)); 691 PetscCall(PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */ 692 PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER)); 693 } 694 switch (w->sync) { 695 case PETSCSF_WINDOW_SYNC_ACTIVE: 696 PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup)); 697 default: 698 break; 699 } 700 PetscFunctionReturn(PETSC_SUCCESS); 701 } 702 703 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject) 704 { 705 PetscSF_Window *w = (PetscSF_Window *)sf->data; 706 PetscSFWindowFlavorType flavor = w->flavor; 707 708 PetscFunctionBegin; 709 PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options"); 710 PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL)); 711 PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL)); 712 PetscCall(PetscSFWindowSetFlavorType(sf, flavor)); 713 PetscOptionsHeadEnd(); 714 PetscFunctionReturn(PETSC_SUCCESS); 715 } 716 717 static PetscErrorCode PetscSFReset_Window(PetscSF sf) 718 { 719 PetscSF_Window *w = (PetscSF_Window *)sf->data; 720 PetscSFDataLink link, next; 721 PetscSFWinLink wlink, wnext; 722 PetscInt i; 723 724 PetscFunctionBegin; 725 for (link = w->link; link; link = next) { 726 next = link->next; 727 PetscCallMPI(MPI_Type_free(&link->unit)); 728 for (i = 0; i < sf->nranks; i++) { 729 PetscCallMPI(MPI_Type_free(&link->mine[i])); 730 PetscCallMPI(MPI_Type_free(&link->remote[i])); 731 } 732 PetscCall(PetscFree2(link->mine, link->remote)); 733 PetscCall(PetscFree(link)); 734 } 735 w->link = NULL; 736 for (wlink = w->wins; wlink; wlink = wnext) { 737 wnext = wlink->next; 738 PetscCheck(!wlink->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr); 739 PetscCall(PetscFree(wlink->dyn_target_addr)); 740 PetscCall(PetscFree(wlink->reqs)); 741 PetscCallMPI(MPI_Win_free(&wlink->win)); 742 PetscCall(PetscFree(wlink)); 743 } 744 w->wins = NULL; 745 PetscCall(PetscSFDestroy(&w->dynsf)); 746 if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info)); 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf) 751 { 752 PetscFunctionBegin; 753 PetscCall(PetscSFReset_Window(sf)); 754 PetscCall(PetscFree(sf->data)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL)); 756 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL)); 757 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL)); 758 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL)); 760 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL)); 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer) 765 { 766 PetscSF_Window *w = (PetscSF_Window *)sf->data; 767 PetscBool iascii; 768 PetscViewerFormat format; 769 770 PetscFunctionBegin; 771 PetscCall(PetscViewerGetFormat(viewer, &format)); 772 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 773 if (iascii) { 774 PetscCall(PetscViewerASCIIPrintf(viewer, " current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered")); 775 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 776 if (w->info != MPI_INFO_NULL) { 777 PetscMPIInt k, nkeys; 778 char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL]; 779 780 PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys)); 781 PetscCall(PetscViewerASCIIPrintf(viewer, " current info with %d keys. Ordered key-value pairs follow:\n", nkeys)); 782 for (k = 0; k < nkeys; k++) { 783 PetscMPIInt flag; 784 785 PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key)); 786 PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag)); 787 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key); 788 PetscCall(PetscViewerASCIIPrintf(viewer, " %s = %s\n", key, value)); 789 } 790 } else { 791 PetscCall(PetscViewerASCIIPrintf(viewer, " current info=MPI_INFO_NULL\n")); 792 } 793 } 794 } 795 PetscFunctionReturn(PETSC_SUCCESS); 796 } 797 798 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf) 799 { 800 PetscSF_Window *w = (PetscSF_Window *)sf->data; 801 PetscSFWindowSyncType synctype; 802 803 PetscFunctionBegin; 804 synctype = w->sync; 805 /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */ 806 if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK; 807 PetscCall(PetscSFWindowSetSyncType(newsf, synctype)); 808 PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor)); 809 PetscCall(PetscSFWindowSetInfo(newsf, w->info)); 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 814 { 815 PetscSF_Window *w = (PetscSF_Window *)sf->data; 816 PetscInt i, nranks; 817 const PetscMPIInt *ranks; 818 const MPI_Aint *target_disp; 819 const MPI_Datatype *mine, *remote; 820 MPI_Request *reqs; 821 MPI_Win win; 822 823 PetscFunctionBegin; 824 PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented"); 825 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 826 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 827 PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win)); 828 for (i = 0; i < nranks; i++) { 829 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 830 831 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 832 PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 833 #if defined(PETSC_HAVE_MPI_RGET) 834 PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i])); 835 #else 836 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 837 #endif 838 } else { 839 PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 840 } 841 } 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 846 { 847 PetscSF_Window *w = (PetscSF_Window *)sf->data; 848 MPI_Win win; 849 MPI_Request *reqs = NULL; 850 851 PetscFunctionBegin; 852 PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs)); 853 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 854 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 855 PetscInt i, nranks; 856 const PetscMPIInt *ranks; 857 858 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 859 for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 860 } 861 PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win)); 862 PetscFunctionReturn(PETSC_SUCCESS); 863 } 864 865 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 866 { 867 PetscSF_Window *w = (PetscSF_Window *)sf->data; 868 PetscInt i, nranks; 869 const PetscMPIInt *ranks; 870 const MPI_Aint *target_disp; 871 const MPI_Datatype *mine, *remote; 872 MPI_Win win; 873 874 PetscFunctionBegin; 875 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 876 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 877 PetscCall(PetscSFWindowOpTranslate(&op)); 878 PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 879 for (i = 0; i < nranks; i++) { 880 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 881 882 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win)); 883 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 884 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 885 } 886 PetscFunctionReturn(PETSC_SUCCESS); 887 } 888 889 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 890 { 891 PetscSF_Window *w = (PetscSF_Window *)sf->data; 892 MPI_Win win; 893 MPI_Request *reqs = NULL; 894 895 PetscFunctionBegin; 896 PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs)); 897 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 898 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 903 { 904 PetscInt i, nranks; 905 const PetscMPIInt *ranks; 906 const MPI_Datatype *mine, *remote; 907 const MPI_Aint *target_disp; 908 MPI_Win win; 909 PetscSF_Window *w = (PetscSF_Window *)sf->data; 910 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 911 PetscSFWindowFlavorType oldf; 912 #endif 913 914 PetscFunctionBegin; 915 PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 916 PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote)); 917 PetscCall(PetscSFWindowOpTranslate(&op)); 918 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 919 /* FetchAndOp without MPI_Get_Accumulate requires locking. 920 we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */ 921 oldf = w->flavor; 922 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 923 PetscCall(PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win)); 924 #else 925 PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win)); 926 #endif 927 for (i = 0; i < nranks; i++) { 928 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 929 930 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 931 PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win)); 932 PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win)); 933 PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 934 PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 935 #else 936 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win)); 937 PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win)); 938 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win)); 939 #endif 940 } 941 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 942 w->flavor = oldf; 943 #endif 944 PetscFunctionReturn(PETSC_SUCCESS); 945 } 946 947 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 948 { 949 MPI_Win win; 950 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 951 PetscSF_Window *w = (PetscSF_Window *)sf->data; 952 #endif 953 MPI_Request *reqs = NULL; 954 955 PetscFunctionBegin; 956 PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs)); 957 if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE)); 958 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 959 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win)); 960 #else 961 PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win)); 962 #endif 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 966 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf) 967 { 968 PetscSF_Window *w = (PetscSF_Window *)sf->data; 969 970 PetscFunctionBegin; 971 sf->ops->SetUp = PetscSFSetUp_Window; 972 sf->ops->SetFromOptions = PetscSFSetFromOptions_Window; 973 sf->ops->Reset = PetscSFReset_Window; 974 sf->ops->Destroy = PetscSFDestroy_Window; 975 sf->ops->View = PetscSFView_Window; 976 sf->ops->Duplicate = PetscSFDuplicate_Window; 977 sf->ops->BcastBegin = PetscSFBcastBegin_Window; 978 sf->ops->BcastEnd = PetscSFBcastEnd_Window; 979 sf->ops->ReduceBegin = PetscSFReduceBegin_Window; 980 sf->ops->ReduceEnd = PetscSFReduceEnd_Window; 981 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window; 982 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window; 983 984 PetscCall(PetscNew(&w)); 985 sf->data = (void *)w; 986 w->sync = PETSCSF_WINDOW_SYNC_FENCE; 987 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 988 w->info = MPI_INFO_NULL; 989 990 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window)); 991 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window)); 992 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window)); 993 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window)); 994 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window)); 995 PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window)); 996 997 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6)) 998 { 999 PetscBool ackbug = PETSC_FALSE; 1000 PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL)); 1001 if (ackbug) { 1002 PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n")); 1003 } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed"); 1004 } 1005 #endif 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008