1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 3 typedef struct _n_PetscSFDataLink *PetscSFDataLink; 4 typedef struct _n_PetscSFWinLink *PetscSFWinLink; 5 6 typedef struct { 7 PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */ 8 PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */ 9 PetscSFWinLink wins; /* List of active windows */ 10 PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */ 11 PetscSF dynsf; 12 MPI_Info info; 13 } PetscSF_Window; 14 15 struct _n_PetscSFDataLink { 16 MPI_Datatype unit; 17 MPI_Datatype *mine; 18 MPI_Datatype *remote; 19 PetscSFDataLink next; 20 }; 21 22 struct _n_PetscSFWinLink { 23 PetscBool inuse; 24 size_t bytes; 25 void *addr; 26 void *paddr; 27 MPI_Win win; 28 MPI_Request *reqs; 29 PetscSFWindowFlavorType flavor; 30 MPI_Aint *dyn_target_addr; 31 PetscBool epoch; 32 PetscSFWinLink next; 33 }; 34 35 const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL}; 36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL}; 37 38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPIU_Allreduce) */ 39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op) 40 { 41 PetscFunctionBegin; 42 if (*op == MPIU_SUM) *op = MPI_SUM; 43 else if (*op == MPIU_MAX) *op = MPI_MAX; 44 else if (*op == MPIU_MIN) *op = MPI_MIN; 45 PetscFunctionReturn(0); 46 } 47 48 /*@C 49 PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank 50 51 Not Collective 52 53 Input Arguments: 54 + sf - star forest 55 - unit - data type for each node 56 57 Output Arguments: 58 + localtypes - types describing part of local leaf buffer referencing each remote rank 59 - remotetypes - types describing part of remote root buffer referenced for each remote rank 60 61 Level: developer 62 63 .seealso: PetscSFSetGraph(), PetscSFView() 64 @*/ 65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes) 66 { 67 PetscSF_Window *w = (PetscSF_Window*)sf->data; 68 PetscErrorCode ierr; 69 PetscSFDataLink link; 70 PetscInt i,nranks; 71 const PetscInt *roffset,*rmine,*rremote; 72 const PetscMPIInt *ranks; 73 74 PetscFunctionBegin; 75 /* Look for types in cache */ 76 for (link=w->link; link; link=link->next) { 77 PetscBool match; 78 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 79 if (match) { 80 *localtypes = link->mine; 81 *remotetypes = link->remote; 82 PetscFunctionReturn(0); 83 } 84 } 85 86 /* Create new composite types for each send rank */ 87 ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); 88 ierr = PetscNew(&link);CHKERRQ(ierr); 89 ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr); 90 ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr); 91 for (i=0; i<nranks; i++) { 92 PetscInt rcount = roffset[i+1] - roffset[i]; 93 PetscMPIInt *rmine,*rremote; 94 #if !defined(PETSC_USE_64BIT_INDICES) 95 rmine = sf->rmine + sf->roffset[i]; 96 rremote = sf->rremote + sf->roffset[i]; 97 #else 98 PetscInt j; 99 ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr); 100 for (j=0; j<rcount; j++) { 101 ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr); 102 ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr); 103 } 104 #endif 105 106 ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr); 107 ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr); 108 #if defined(PETSC_USE_64BIT_INDICES) 109 ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr); 110 #endif 111 ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr); 112 ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr); 113 } 114 link->next = w->link; 115 w->link = link; 116 117 *localtypes = link->mine; 118 *remotetypes = link->remote; 119 PetscFunctionReturn(0); 120 } 121 122 /*@C 123 PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation 124 125 Logically Collective 126 127 Input Arguments: 128 + sf - star forest for communication 129 - flavor - flavor type 130 131 Options Database Key: 132 . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType) 133 134 Level: advanced 135 136 Notes: Windows reusage follow this rules: 137 138 PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create 139 140 PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g. 141 for i=1 to K 142 PetscSFOperationBegin(rootdata1,leafdata_whatever); 143 PetscSFOperationEnd(rootdata1,leafdata_whatever); 144 ... 145 PetscSFOperationBegin(rootdataN,leafdata_whatever); 146 PetscSFOperationEnd(rootdataN,leafdata_whatever); 147 endfor 148 The following pattern will instead raise an error 149 PetscSFOperationBegin(rootdata1,leafdata_whatever); 150 PetscSFOperationEnd(rootdata1,leafdata_whatever); 151 PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever); 152 PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever); 153 154 PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use 155 156 PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE 157 158 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType() 159 @*/ 160 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor) 161 { 162 PetscErrorCode ierr; 163 164 PetscFunctionBegin; 165 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 166 PetscValidLogicalCollectiveEnum(sf,flavor,2); 167 ierr = PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor) 172 { 173 PetscSF_Window *w = (PetscSF_Window*)sf->data; 174 175 PetscFunctionBegin; 176 w->flavor = flavor; 177 PetscFunctionReturn(0); 178 } 179 180 /*@C 181 PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication 182 183 Logically Collective 184 185 Input Argument: 186 . sf - star forest for communication 187 188 Output Argument: 189 . flavor - flavor type 190 191 Level: advanced 192 193 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType() 194 @*/ 195 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor) 196 { 197 PetscErrorCode ierr; 198 199 PetscFunctionBegin; 200 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 201 PetscValidPointer(flavor,2); 202 ierr = PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor) 207 { 208 PetscSF_Window *w = (PetscSF_Window*)sf->data; 209 210 PetscFunctionBegin; 211 *flavor = w->flavor; 212 PetscFunctionReturn(0); 213 } 214 215 /*@C 216 PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication 217 218 Logically Collective 219 220 Input Arguments: 221 + sf - star forest for communication 222 - sync - synchronization type 223 224 Options Database Key: 225 . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType) 226 227 Level: advanced 228 229 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType() 230 @*/ 231 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync) 232 { 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 237 PetscValidLogicalCollectiveEnum(sf,sync,2); 238 ierr = PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync) 243 { 244 PetscSF_Window *w = (PetscSF_Window*)sf->data; 245 246 PetscFunctionBegin; 247 w->sync = sync; 248 PetscFunctionReturn(0); 249 } 250 251 /*@C 252 PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication 253 254 Logically Collective 255 256 Input Argument: 257 . sf - star forest for communication 258 259 Output Argument: 260 . sync - synchronization type 261 262 Level: advanced 263 264 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType() 265 @*/ 266 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync) 267 { 268 PetscErrorCode ierr; 269 270 PetscFunctionBegin; 271 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 272 PetscValidPointer(sync,2); 273 ierr = PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync) 278 { 279 PetscSF_Window *w = (PetscSF_Window*)sf->data; 280 281 PetscFunctionBegin; 282 *sync = w->sync; 283 PetscFunctionReturn(0); 284 } 285 286 /*@C 287 PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation 288 289 Logically Collective 290 291 Input Argument: 292 + sf - star forest for communication 293 - info - MPI_Info handle 294 295 Level: advanced 296 297 Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL. 298 299 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo() 300 @*/ 301 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info) 302 { 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 307 ierr = PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info) 312 { 313 PetscSF_Window *w = (PetscSF_Window*)sf->data; 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 if (w->info != MPI_INFO_NULL) { 318 ierr = MPI_Info_free(&w->info);CHKERRQ(ierr); 319 } 320 if (info != MPI_INFO_NULL) { 321 ierr = MPI_Info_dup(info,&w->info);CHKERRQ(ierr); 322 } 323 PetscFunctionReturn(0); 324 } 325 326 /*@C 327 PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation 328 329 Logically Collective 330 331 Input Argument: 332 . sf - star forest for communication 333 334 Output Argument: 335 . info - MPI_Info handle 336 337 Level: advanced 338 339 Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL 340 341 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo() 342 @*/ 343 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info) 344 { 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 349 PetscValidPointer(info,2); 350 ierr = PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info) 355 { 356 PetscSF_Window *w = (PetscSF_Window*)sf->data; 357 358 PetscFunctionBegin; 359 *info = w->info; 360 PetscFunctionReturn(0); 361 } 362 363 /* 364 PetscSFGetWindow - Get a window for use with a given data type 365 366 Collective on PetscSF 367 368 Input Arguments: 369 + sf - star forest 370 . unit - data type 371 . array - array to be sent 372 . sync - type of synchronization PetscSFWindowSyncType 373 . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window 374 . fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE 375 . postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE 376 - startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE 377 378 Output Arguments: 379 + target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only) 380 + reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only) 381 - win - window 382 383 Level: developer 384 .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes() 385 */ 386 static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win) 387 { 388 PetscSF_Window *w = (PetscSF_Window*)sf->data; 389 PetscErrorCode ierr; 390 MPI_Aint lb,lb_true,bytes,bytes_true; 391 PetscSFWinLink link; 392 MPI_Aint winaddr; 393 PetscInt nranks; 394 PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE; 395 PetscBool dummy[2]; 396 MPI_Aint wsize; 397 398 PetscFunctionBegin; 399 ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRQ(ierr); 400 ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRQ(ierr); 401 if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature"); 402 if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature"); 403 if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 404 for (link=w->wins; reuse && link; link=link->next) { 405 PetscBool winok = PETSC_FALSE; 406 if (w->flavor != link->flavor) continue; 407 switch (w->flavor) { 408 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */ 409 if (array == link->addr) { 410 if (PetscDefined(USE_DEBUG)) { 411 dummy[0] = PETSC_TRUE; 412 dummy[1] = PETSC_TRUE; 413 ierr = MPI_Allreduce(MPI_IN_PLACE,dummy ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 414 ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 415 if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead"); 416 } 417 if (link->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use"); 418 if (epoch && link->epoch) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished"); 419 winok = PETSC_TRUE; 420 link->paddr = array; 421 } else if (PetscDefined(USE_DEBUG)) { 422 dummy[0] = PETSC_FALSE; 423 dummy[1] = PETSC_FALSE; 424 ierr = MPI_Allreduce(MPI_IN_PLACE,dummy ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 425 ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 426 if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead"); 427 } 428 break; 429 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */ 430 case PETSCSF_WINDOW_FLAVOR_SHARED: 431 if (!link->inuse && bytes == (MPI_Aint)link->bytes) { 432 update = PETSC_TRUE; 433 link->paddr = array; 434 winok = PETSC_TRUE; 435 } 436 break; 437 default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]); 438 } 439 if (winok) { 440 *win = link->win; 441 ierr = PetscInfo3(sf,"Reusing window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 442 goto found; 443 } 444 } 445 446 wsize = (MPI_Aint)bytes*sf->nroots; 447 ierr = PetscNew(&link);CHKERRQ(ierr); 448 link->bytes = bytes; 449 link->next = w->wins; 450 link->flavor = w->flavor; 451 link->dyn_target_addr = NULL; 452 link->reqs = NULL; 453 w->wins = link; 454 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 455 PetscInt i; 456 457 ierr = PetscMalloc1(sf->nranks,&link->reqs);CHKERRQ(ierr); 458 for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL; 459 } 460 switch (w->flavor) { 461 case PETSCSF_WINDOW_FLAVOR_CREATE: 462 ierr = MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr); 463 link->addr = array; 464 link->paddr = array; 465 break; 466 case PETSCSF_WINDOW_FLAVOR_DYNAMIC: 467 ierr = MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr); 468 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */ 469 ierr = MPI_Win_attach(link->win,wsize ? array : &ierr,wsize);CHKERRQ(ierr); 470 #else 471 ierr = MPI_Win_attach(link->win,array,wsize);CHKERRQ(ierr); 472 #endif 473 link->addr = array; 474 link->paddr = array; 475 if (!w->dynsf) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()"); 476 ierr = PetscSFSetUp(w->dynsf);CHKERRQ(ierr); 477 ierr = PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 478 ierr = PetscMalloc1(nranks,&link->dyn_target_addr);CHKERRQ(ierr); 479 ierr = MPI_Get_address(array,&winaddr);CHKERRQ(ierr); 480 ierr = PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr);CHKERRQ(ierr); 481 ierr = PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr);CHKERRQ(ierr); 482 break; 483 case PETSCSF_WINDOW_FLAVOR_ALLOCATE: 484 ierr = MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRQ(ierr); 485 update = PETSC_TRUE; 486 link->paddr = array; 487 break; 488 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 489 case PETSCSF_WINDOW_FLAVOR_SHARED: 490 ierr = MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRQ(ierr); 491 update = PETSC_TRUE; 492 link->paddr = array; 493 break; 494 #endif 495 default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]); 496 } 497 ierr = PetscInfo3(sf,"New window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 498 *win = link->win; 499 500 found: 501 502 if (target_disp) *target_disp = link->dyn_target_addr; 503 if (reqs) *reqs = link->reqs; 504 if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */ 505 PetscMPIInt rank; 506 507 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 508 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);CHKERRQ(ierr); } 509 ierr = PetscMemcpy(link->addr,array,sf->nroots*bytes);CHKERRQ(ierr); 510 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 511 ierr = MPI_Win_unlock(rank,*win);CHKERRQ(ierr); 512 ierr = MPI_Win_fence(0,*win);CHKERRQ(ierr); 513 } 514 } 515 link->inuse = PETSC_TRUE; 516 link->epoch = epoch; 517 if (epoch) { 518 switch (sync) { 519 case PETSCSF_WINDOW_SYNC_FENCE: 520 ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr); 521 break; 522 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 523 break; 524 case PETSCSF_WINDOW_SYNC_ACTIVE: { 525 MPI_Group ingroup,outgroup; 526 PetscMPIInt isize,osize; 527 528 /* OpenMPI 4.0.2 with btl=vader does not like calling 529 - MPI_Win_complete when ogroup is empty 530 - MPI_Win_wait when igroup is empty 531 So, we do not even issue the corresponding start and post calls 532 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 533 start(outgroup) has a matching post(ingroup) 534 and this is guaranteed by PetscSF 535 */ 536 ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr); 537 ierr = MPI_Group_size(ingroup,&isize);CHKERRQ(ierr); 538 ierr = MPI_Group_size(outgroup,&osize);CHKERRQ(ierr); 539 if (isize) { ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRQ(ierr); } 540 if (osize) { ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRQ(ierr); } 541 } break; 542 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type"); 543 } 544 } 545 PetscFunctionReturn(0); 546 } 547 548 /* 549 PetscSFFindWindow - Finds a window that is already in use 550 551 Not Collective 552 553 Input Arguments: 554 + sf - star forest 555 . unit - data type 556 - array - array with which the window is associated 557 558 Output Arguments: 559 + win - window 560 - reqs - outstanding requests associated to the window 561 562 Level: developer 563 564 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 565 */ 566 static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs) 567 { 568 PetscSF_Window *w = (PetscSF_Window*)sf->data; 569 PetscSFWinLink link; 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 *win = MPI_WIN_NULL; 574 for (link=w->wins; link; link=link->next) { 575 if (array == link->paddr) { 576 ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 577 *win = link->win; 578 *reqs = link->reqs; 579 PetscFunctionReturn(0); 580 } 581 } 582 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use"); 583 PetscFunctionReturn(0); 584 } 585 586 /* 587 PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow() 588 589 Collective 590 591 Input Arguments: 592 + sf - star forest 593 . unit - data type 594 . array - array associated with window 595 . sync - type of synchronization PetscSFWindowSyncType 596 . epoch - close an epoch, must match argument to PetscSFGetWindow() 597 . update - if we have to update the local window array 598 - win - window 599 600 Level: developer 601 602 .seealso: PetscSFFindWindow() 603 */ 604 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win) 605 { 606 PetscSF_Window *w = (PetscSF_Window*)sf->data; 607 PetscErrorCode ierr; 608 PetscSFWinLink *p,link; 609 PetscBool reuse = PETSC_FALSE; 610 PetscSFWindowFlavorType flavor; 611 void* laddr; 612 size_t bytes; 613 614 PetscFunctionBegin; 615 for (p=&w->wins; *p; p=&(*p)->next) { 616 link = *p; 617 if (*win == link->win) { 618 if (array != link->paddr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array"); 619 if (epoch != link->epoch) { 620 if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end"); 621 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch"); 622 } 623 laddr = link->addr; 624 flavor = link->flavor; 625 bytes = link->bytes; 626 if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE; 627 else { *p = link->next; update = PETSC_FALSE; } /* remove from list */ 628 goto found; 629 } 630 } 631 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use"); 632 633 found: 634 ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 635 if (epoch) { 636 switch (sync) { 637 case PETSCSF_WINDOW_SYNC_FENCE: 638 ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr); 639 break; 640 case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ 641 break; 642 case PETSCSF_WINDOW_SYNC_ACTIVE: { 643 MPI_Group ingroup,outgroup; 644 PetscMPIInt isize,osize; 645 646 /* OpenMPI 4.0.2 with btl=wader does not like calling 647 - MPI_Win_complete when ogroup is empty 648 - MPI_Win_wait when igroup is empty 649 The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that 650 - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete 651 - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait 652 */ 653 ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr); 654 ierr = MPI_Group_size(ingroup,&isize);CHKERRQ(ierr); 655 ierr = MPI_Group_size(outgroup,&osize);CHKERRQ(ierr); 656 if (osize) { ierr = MPI_Win_complete(*win);CHKERRQ(ierr); } 657 if (isize) { ierr = MPI_Win_wait(*win);CHKERRQ(ierr); } 658 } break; 659 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type"); 660 } 661 } 662 if (update) { 663 if (sync == PETSCSF_WINDOW_SYNC_LOCK) { 664 ierr = MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);CHKERRQ(ierr); 665 } 666 ierr = PetscMemcpy(array,laddr,sf->nroots*bytes);CHKERRQ(ierr); 667 } 668 link->epoch = PETSC_FALSE; 669 link->inuse = PETSC_FALSE; 670 link->paddr = NULL; 671 if (!reuse) { 672 ierr = PetscFree(link->dyn_target_addr);CHKERRQ(ierr); 673 ierr = PetscFree(link->reqs);CHKERRQ(ierr); 674 ierr = MPI_Win_free(&link->win);CHKERRQ(ierr); 675 ierr = PetscFree(link);CHKERRQ(ierr); 676 *win = MPI_WIN_NULL; 677 } 678 PetscFunctionReturn(0); 679 } 680 681 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf) 682 { 683 PetscSF_Window *w = (PetscSF_Window*)sf->data; 684 PetscErrorCode ierr; 685 MPI_Group ingroup,outgroup; 686 687 PetscFunctionBegin; 688 ierr = PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);CHKERRQ(ierr); 689 if (!w->dynsf) { 690 PetscInt i; 691 PetscSFNode *remotes; 692 693 ierr = PetscMalloc1(sf->nranks,&remotes);CHKERRQ(ierr); 694 for (i=0;i<sf->nranks;i++) { 695 remotes[i].rank = sf->ranks[i]; 696 remotes[i].index = 0; 697 } 698 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);CHKERRQ(ierr); 699 ierr = PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE);CHKERRQ(ierr); /* break recursion */ 700 ierr = PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);CHKERRQ(ierr); 701 ierr = PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);CHKERRQ(ierr); 702 } 703 switch (w->sync) { 704 case PETSCSF_WINDOW_SYNC_ACTIVE: 705 ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr); 706 default: 707 break; 708 } 709 PetscFunctionReturn(0); 710 } 711 712 static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf) 713 { 714 PetscSF_Window *w = (PetscSF_Window*)sf->data; 715 PetscErrorCode ierr; 716 PetscSFWindowFlavorType flavor = w->flavor; 717 718 PetscFunctionBegin; 719 ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");CHKERRQ(ierr); 720 ierr = PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);CHKERRQ(ierr); 721 ierr = PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);CHKERRQ(ierr); 722 ierr = PetscSFWindowSetFlavorType(sf,flavor);CHKERRQ(ierr); 723 ierr = PetscOptionsTail();CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 static PetscErrorCode PetscSFReset_Window(PetscSF sf) 728 { 729 PetscSF_Window *w = (PetscSF_Window*)sf->data; 730 PetscErrorCode ierr; 731 PetscSFDataLink link,next; 732 PetscSFWinLink wlink,wnext; 733 PetscInt i; 734 735 PetscFunctionBegin; 736 for (link=w->link; link; link=next) { 737 next = link->next; 738 ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr); 739 for (i=0; i<sf->nranks; i++) { 740 ierr = MPI_Type_free(&link->mine[i]);CHKERRQ(ierr); 741 ierr = MPI_Type_free(&link->remote[i]);CHKERRQ(ierr); 742 } 743 ierr = PetscFree2(link->mine,link->remote);CHKERRQ(ierr); 744 ierr = PetscFree(link);CHKERRQ(ierr); 745 } 746 w->link = NULL; 747 for (wlink=w->wins; wlink; wlink=wnext) { 748 wnext = wlink->next; 749 if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr); 750 ierr = PetscFree(wlink->dyn_target_addr);CHKERRQ(ierr); 751 ierr = PetscFree(wlink->reqs);CHKERRQ(ierr); 752 ierr = MPI_Win_free(&wlink->win);CHKERRQ(ierr); 753 ierr = PetscFree(wlink);CHKERRQ(ierr); 754 } 755 w->wins = NULL; 756 ierr = PetscSFDestroy(&w->dynsf);CHKERRQ(ierr); 757 if (w->info != MPI_INFO_NULL) { 758 ierr = MPI_Info_free(&w->info);CHKERRQ(ierr); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf) 764 { 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 ierr = PetscSFReset_Window(sf);CHKERRQ(ierr); 769 ierr = PetscFree(sf->data);CHKERRQ(ierr); 770 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);CHKERRQ(ierr); 772 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);CHKERRQ(ierr); 773 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);CHKERRQ(ierr); 774 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);CHKERRQ(ierr); 775 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer) 780 { 781 PetscSF_Window *w = (PetscSF_Window*)sf->data; 782 PetscErrorCode ierr; 783 PetscBool iascii; 784 PetscViewerFormat format; 785 786 PetscFunctionBegin; 787 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 788 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 789 if (iascii) { 790 ierr = PetscViewerASCIIPrintf(viewer," current flavor=%s synchronization=%s sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr); 791 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 792 if (w->info != MPI_INFO_NULL) { 793 PetscMPIInt k,nkeys; 794 char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL]; 795 796 ierr = MPI_Info_get_nkeys(w->info,&nkeys);CHKERRQ(ierr); 797 ierr = PetscViewerASCIIPrintf(viewer," current info with %d keys. Ordered key-value pairs follow:\n",nkeys);CHKERRQ(ierr); 798 for (k = 0; k < nkeys; k++) { 799 PetscMPIInt flag; 800 801 ierr = MPI_Info_get_nthkey(w->info,k,key);CHKERRQ(ierr); 802 ierr = MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);CHKERRQ(ierr); 803 if (!flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key); 804 ierr = PetscViewerASCIIPrintf(viewer," %s = %s\n",key,value);CHKERRQ(ierr); 805 } 806 } else { 807 ierr = PetscViewerASCIIPrintf(viewer," current info=MPI_INFO_NULL\n");CHKERRQ(ierr); 808 } 809 } 810 } 811 PetscFunctionReturn(0); 812 } 813 814 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf) 815 { 816 PetscSF_Window *w = (PetscSF_Window*)sf->data; 817 PetscErrorCode ierr; 818 PetscSFWindowSyncType synctype; 819 820 PetscFunctionBegin; 821 synctype = w->sync; 822 /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */ 823 if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK; 824 ierr = PetscSFWindowSetSyncType(newsf,synctype);CHKERRQ(ierr); 825 ierr = PetscSFWindowSetFlavorType(newsf,w->flavor);CHKERRQ(ierr); 826 ierr = PetscSFWindowSetInfo(newsf,w->info);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 static PetscErrorCode PetscSFBcastAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 831 { 832 PetscSF_Window *w = (PetscSF_Window*)sf->data; 833 PetscErrorCode ierr; 834 PetscInt i,nranks; 835 const PetscMPIInt *ranks; 836 const MPI_Aint *target_disp; 837 const MPI_Datatype *mine,*remote; 838 MPI_Request *reqs; 839 MPI_Win win; 840 841 PetscFunctionBegin; 842 if (op != MPI_REPLACE || op != MPIU_REPLACE) SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastAndOpBegin_Window with op!=MPI_REPLACE has not been implemented"); 843 ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr); 844 ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr); 845 ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);CHKERRQ(ierr); 846 for (i=0; i<nranks; i++) { 847 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 848 849 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 850 ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr); 851 #if defined(PETSC_HAVE_MPI_RGET) 852 ierr = MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);CHKERRQ(ierr); 853 #else 854 ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr); 855 #endif 856 } else { 857 ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr); 858 } 859 } 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode PetscSFBcastAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 864 { 865 PetscSF_Window *w = (PetscSF_Window*)sf->data; 866 PetscErrorCode ierr; 867 MPI_Win win; 868 MPI_Request *reqs = NULL; 869 870 PetscFunctionBegin; 871 ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr); 872 if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); } 873 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { 874 PetscInt i,nranks; 875 const PetscMPIInt *ranks; 876 877 ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr); 878 for (i=0; i<nranks; i++) { 879 ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr); 880 } 881 } 882 ierr = PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 887 { 888 PetscSF_Window *w = (PetscSF_Window*)sf->data; 889 PetscErrorCode ierr; 890 PetscInt i,nranks; 891 const PetscMPIInt *ranks; 892 const MPI_Aint *target_disp; 893 const MPI_Datatype *mine,*remote; 894 MPI_Win win; 895 896 PetscFunctionBegin; 897 ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr); 898 ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr); 899 ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr); 900 ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr); 901 for (i=0; i<nranks; i++) { 902 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 903 904 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);} 905 ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win); 906 if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */ 907 PetscMPIInt len; 908 char errstring[MPI_MAX_ERROR_STRING]; 909 910 MPI_Error_string(ierr,errstring,&len); 911 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring); 912 } 913 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);} 914 } 915 PetscFunctionReturn(0); 916 } 917 918 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 919 { 920 PetscSF_Window *w = (PetscSF_Window*)sf->data; 921 PetscErrorCode ierr; 922 MPI_Win win; 923 MPI_Request *reqs = NULL; 924 925 PetscFunctionBegin; 926 ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr); 927 if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); } 928 ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 933 { 934 PetscErrorCode ierr; 935 PetscInt i,nranks; 936 const PetscMPIInt *ranks; 937 const MPI_Datatype *mine,*remote; 938 const MPI_Aint *target_disp; 939 MPI_Win win; 940 PetscSF_Window *w = (PetscSF_Window*)sf->data; 941 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 942 PetscSFWindowFlavorType oldf; 943 #endif 944 945 PetscFunctionBegin; 946 ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr); 947 ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr); 948 ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr); 949 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 950 /* FetchAndOp without MPI_Get_Accumulate requires locking. 951 we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */ 952 oldf = w->flavor; 953 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 954 ierr = PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);CHKERRQ(ierr); 955 #else 956 ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr); 957 #endif 958 for (i=0; i<nranks; i++) { 959 MPI_Aint tdp = target_disp ? target_disp[i] : 0; 960 961 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 962 ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);CHKERRQ(ierr); 963 ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr); 964 ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win); 965 if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */ 966 PetscMPIInt len; 967 char errstring[MPI_MAX_ERROR_STRING]; 968 969 MPI_Error_string(ierr,errstring,&len); 970 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring); 971 } 972 ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr); 973 #else 974 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);CHKERRQ(ierr); } 975 ierr = MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win); 976 if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */ 977 PetscMPIInt len; 978 char errstring[MPI_MAX_ERROR_STRING]; 979 980 MPI_Error_string(ierr,errstring,&len); 981 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring); 982 } 983 if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr); } 984 #endif 985 } 986 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 987 w->flavor = oldf; 988 #endif 989 PetscFunctionReturn(0); 990 } 991 992 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 993 { 994 PetscErrorCode ierr; 995 MPI_Win win; 996 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 997 PetscSF_Window *w = (PetscSF_Window*)sf->data; 998 #endif 999 MPI_Request *reqs = NULL; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr); 1003 if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); } 1004 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE) 1005 ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr); 1006 #else 1007 ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);CHKERRQ(ierr); 1008 #endif 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf) 1013 { 1014 PetscSF_Window *w = (PetscSF_Window*)sf->data; 1015 PetscErrorCode ierr; 1016 1017 PetscFunctionBegin; 1018 sf->ops->SetUp = PetscSFSetUp_Window; 1019 sf->ops->SetFromOptions = PetscSFSetFromOptions_Window; 1020 sf->ops->Reset = PetscSFReset_Window; 1021 sf->ops->Destroy = PetscSFDestroy_Window; 1022 sf->ops->View = PetscSFView_Window; 1023 sf->ops->Duplicate = PetscSFDuplicate_Window; 1024 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Window; 1025 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Window; 1026 sf->ops->ReduceBegin = PetscSFReduceBegin_Window; 1027 sf->ops->ReduceEnd = PetscSFReduceEnd_Window; 1028 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window; 1029 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window; 1030 1031 ierr = PetscNewLog(sf,&w);CHKERRQ(ierr); 1032 sf->data = (void*)w; 1033 w->sync = PETSCSF_WINDOW_SYNC_FENCE; 1034 w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE; 1035 w->info = MPI_INFO_NULL; 1036 1037 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);CHKERRQ(ierr); 1038 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);CHKERRQ(ierr); 1039 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);CHKERRQ(ierr); 1040 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);CHKERRQ(ierr); 1041 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);CHKERRQ(ierr); 1042 ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);CHKERRQ(ierr); 1043 1044 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6)) 1045 { 1046 PetscBool ackbug = PETSC_FALSE; 1047 ierr = PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);CHKERRQ(ierr); 1048 if (ackbug) { 1049 ierr = PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");CHKERRQ(ierr); 1050 } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed"); 1051 } 1052 #endif 1053 PetscFunctionReturn(0); 1054 } 1055