xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision 24ded41b4e3afbef0dd5eaa1b3d8dd0172f6dba2)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype    *mine;
18   MPI_Datatype    *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                    *addr;
26   void                    *paddr;
27   MPI_Win                 win;
28   MPI_Request             *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint                *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40 {
41   PetscFunctionBegin;
42   if (*op == MPIU_SUM) *op = MPI_SUM;
43   else if (*op == MPIU_MAX) *op = MPI_MAX;
44   else if (*op == MPIU_MIN) *op = MPI_MIN;
45   PetscFunctionReturn(0);
46 }
47 
48 /*@C
49    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50 
51    Not Collective
52 
53    Input Parameters:
54 +  sf - star forest
55 -  unit - data type for each node
56 
57    Output Parameters:
58 +  localtypes - types describing part of local leaf buffer referencing each remote rank
59 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
60 
61    Level: developer
62 
63 .seealso: `PetscSFSetGraph()`, `PetscSFView()`
64 @*/
65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
66 {
67   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
68   PetscSFDataLink   link;
69   PetscInt          i,nranks;
70   const PetscInt    *roffset,*rmine,*rremote;
71   const PetscMPIInt *ranks;
72 
73   PetscFunctionBegin;
74   /* Look for types in cache */
75   for (link=w->link; link; link=link->next) {
76     PetscBool match;
77     PetscCall(MPIPetsc_Type_compare(unit,link->unit,&match));
78     if (match) {
79       *localtypes  = link->mine;
80       *remotetypes = link->remote;
81       PetscFunctionReturn(0);
82     }
83   }
84 
85   /* Create new composite types for each send rank */
86   PetscCall(PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote));
87   PetscCall(PetscNew(&link));
88   PetscCallMPI(MPI_Type_dup(unit,&link->unit));
89   PetscCall(PetscMalloc2(nranks,&link->mine,nranks,&link->remote));
90   for (i=0; i<nranks; i++) {
91     PetscInt    rcount = roffset[i+1] - roffset[i];
92     PetscMPIInt *rmine,*rremote;
93 #if !defined(PETSC_USE_64BIT_INDICES)
94     rmine   = sf->rmine + sf->roffset[i];
95     rremote = sf->rremote + sf->roffset[i];
96 #else
97     PetscInt j;
98     PetscCall(PetscMalloc2(rcount,&rmine,rcount,&rremote));
99     for (j=0; j<rcount; j++) {
100       PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j));
101       PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j));
102     }
103 #endif
104 
105     PetscCallMPI(MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]));
106     PetscCallMPI(MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]));
107 #if defined(PETSC_USE_64BIT_INDICES)
108     PetscCall(PetscFree2(rmine,rremote));
109 #endif
110     PetscCallMPI(MPI_Type_commit(&link->mine[i]));
111     PetscCallMPI(MPI_Type_commit(&link->remote[i]));
112   }
113   link->next = w->link;
114   w->link    = link;
115 
116   *localtypes  = link->mine;
117   *remotetypes = link->remote;
118   PetscFunctionReturn(0);
119 }
120 
121 /*@C
122    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
123 
124    Logically Collective
125 
126    Input Parameters:
127 +  sf - star forest for communication
128 -  flavor - flavor type
129 
130    Options Database Key:
131 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
132 
133    Level: advanced
134 
135    Notes: Windows reusage follow this rules:
136 
137      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
138 
139      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
140        for i=1 to K
141          PetscSFOperationBegin(rootdata1,leafdata_whatever);
142          PetscSFOperationEnd(rootdata1,leafdata_whatever);
143          ...
144          PetscSFOperationBegin(rootdataN,leafdata_whatever);
145          PetscSFOperationEnd(rootdataN,leafdata_whatever);
146        endfor
147        The following pattern will instead raise an error
148          PetscSFOperationBegin(rootdata1,leafdata_whatever);
149          PetscSFOperationEnd(rootdata1,leafdata_whatever);
150          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
151          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
152 
153      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
154 
155      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
156 
157 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
158 @*/
159 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
160 {
161   PetscFunctionBegin;
162   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
163   PetscValidLogicalCollectiveEnum(sf,flavor,2);
164   PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));
165   PetscFunctionReturn(0);
166 }
167 
168 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
169 {
170   PetscSF_Window *w = (PetscSF_Window*)sf->data;
171 
172   PetscFunctionBegin;
173   w->flavor = flavor;
174   PetscFunctionReturn(0);
175 }
176 
177 /*@C
178    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
179 
180    Logically Collective
181 
182    Input Parameter:
183 .  sf - star forest for communication
184 
185    Output Parameter:
186 .  flavor - flavor type
187 
188    Level: advanced
189 
190 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
191 @*/
192 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
193 {
194   PetscFunctionBegin;
195   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
196   PetscValidPointer(flavor,2);
197   PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
202 {
203   PetscSF_Window *w = (PetscSF_Window*)sf->data;
204 
205   PetscFunctionBegin;
206   *flavor = w->flavor;
207   PetscFunctionReturn(0);
208 }
209 
210 /*@C
211    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
212 
213    Logically Collective
214 
215    Input Parameters:
216 +  sf - star forest for communication
217 -  sync - synchronization type
218 
219    Options Database Key:
220 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
221 
222    Level: advanced
223 
224 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`
225 @*/
226 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
227 {
228   PetscFunctionBegin;
229   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
230   PetscValidLogicalCollectiveEnum(sf,sync,2);
231   PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
236 {
237   PetscSF_Window *w = (PetscSF_Window*)sf->data;
238 
239   PetscFunctionBegin;
240   w->sync = sync;
241   PetscFunctionReturn(0);
242 }
243 
244 /*@C
245    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
246 
247    Logically Collective
248 
249    Input Parameter:
250 .  sf - star forest for communication
251 
252    Output Parameter:
253 .  sync - synchronization type
254 
255    Level: advanced
256 
257 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`
258 @*/
259 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
260 {
261   PetscFunctionBegin;
262   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
263   PetscValidPointer(sync,2);
264   PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
269 {
270   PetscSF_Window *w = (PetscSF_Window*)sf->data;
271 
272   PetscFunctionBegin;
273   *sync = w->sync;
274   PetscFunctionReturn(0);
275 }
276 
277 /*@C
278    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
279 
280    Logically Collective
281 
282    Input Parameters:
283 +  sf - star forest for communication
284 -  info - MPI_Info handle
285 
286    Level: advanced
287 
288    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
289 
290 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
291 @*/
292 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
293 {
294   PetscFunctionBegin;
295   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
296   PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
301 {
302   PetscSF_Window *w = (PetscSF_Window*)sf->data;
303 
304   PetscFunctionBegin;
305   if (w->info != MPI_INFO_NULL) {
306     PetscCallMPI(MPI_Info_free(&w->info));
307   }
308   if (info != MPI_INFO_NULL) {
309     PetscCallMPI(MPI_Info_dup(info,&w->info));
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 /*@C
315    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
316 
317    Logically Collective
318 
319    Input Parameter:
320 .  sf - star forest for communication
321 
322    Output Parameter:
323 .  info - MPI_Info handle
324 
325    Level: advanced
326 
327    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
328 
329 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
330 @*/
331 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
332 {
333   PetscFunctionBegin;
334   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
335   PetscValidPointer(info,2);
336   PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));
337   PetscFunctionReturn(0);
338 }
339 
340 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
341 {
342   PetscSF_Window *w = (PetscSF_Window*)sf->data;
343 
344   PetscFunctionBegin;
345   *info = w->info;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350    PetscSFGetWindow - Get a window for use with a given data type
351 
352    Collective on PetscSF
353 
354    Input Parameters:
355 +  sf - star forest
356 .  unit - data type
357 .  array - array to be sent
358 .  sync - type of synchronization PetscSFWindowSyncType
359 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
360 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
361 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
362 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
363 
364    Output Parameters:
365 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
366 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
367 -  win - window
368 
369    Level: developer
370 .seealso: `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
371 */
372 static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
373 {
374   PetscSF_Window *w = (PetscSF_Window*)sf->data;
375   MPI_Aint       lb,lb_true,bytes,bytes_true;
376   PetscSFWinLink link;
377 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
378   MPI_Aint       winaddr;
379   PetscInt       nranks;
380 #endif
381   PetscBool      reuse = PETSC_FALSE, update = PETSC_FALSE;
382   PetscBool      dummy[2];
383   MPI_Aint       wsize;
384 
385   PetscFunctionBegin;
386   PetscCallMPI(MPI_Type_get_extent(unit,&lb,&bytes));
387   PetscCallMPI(MPI_Type_get_true_extent(unit,&lb_true,&bytes_true));
388   PetscCheck(lb == 0 && lb_true == 0,PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
389   PetscCheck(bytes == bytes_true,PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
390   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
391   for (link=w->wins; reuse && link; link=link->next) {
392     PetscBool winok = PETSC_FALSE;
393     if (w->flavor != link->flavor) continue;
394     switch (w->flavor) {
395     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
396       if (array == link->addr) {
397         if (PetscDefined(USE_DEBUG)) {
398           dummy[0] = PETSC_TRUE;
399           dummy[1] = PETSC_TRUE;
400           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,dummy,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf)));
401           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf)));
402           PetscCheck(dummy[0] == dummy[1],PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
403         }
404         PetscCheck(!link->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use");
405         PetscCheck(!epoch || !link->epoch,PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished");
406         winok = PETSC_TRUE;
407         link->paddr = array;
408       } else if (PetscDefined(USE_DEBUG)) {
409         dummy[0] = PETSC_FALSE;
410         dummy[1] = PETSC_FALSE;
411         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,dummy  ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf)));
412         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf)));
413         PetscCheck(dummy[0] == dummy[1],PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
414       }
415       break;
416     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
417     case PETSCSF_WINDOW_FLAVOR_SHARED:
418       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
419         update = PETSC_TRUE;
420         link->paddr = array;
421         winok = PETSC_TRUE;
422       }
423       break;
424     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
425     }
426     if (winok) {
427       *win = link->win;
428       PetscCall(PetscInfo(sf,"Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf)));
429       goto found;
430     }
431   }
432 
433   wsize = (MPI_Aint)bytes*sf->nroots;
434   PetscCall(PetscNew(&link));
435   link->bytes           = bytes;
436   link->next            = w->wins;
437   link->flavor          = w->flavor;
438   link->dyn_target_addr = NULL;
439   link->reqs            = NULL;
440   w->wins               = link;
441   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
442     PetscInt i;
443 
444     PetscCall(PetscMalloc1(sf->nranks,&link->reqs));
445     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
446   }
447   switch (w->flavor) {
448   case PETSCSF_WINDOW_FLAVOR_CREATE:
449     PetscCallMPI(MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win));
450     link->addr  = array;
451     link->paddr = array;
452     break;
453 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
454   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
455     PetscCallMPI(MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win));
456 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
457     PetscCallMPI(MPI_Win_attach(link->win,wsize ? array : (void*)dummy,wsize));
458 #else
459     PetscCallMPI(MPI_Win_attach(link->win,array,wsize));
460 #endif
461     link->addr  = array;
462     link->paddr = array;
463     PetscCheck(w->dynsf,PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()");
464     PetscCall(PetscSFSetUp(w->dynsf));
465     PetscCall(PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL));
466     PetscCall(PetscMalloc1(nranks,&link->dyn_target_addr));
467     PetscCallMPI(MPI_Get_address(array,&winaddr));
468     PetscCall(PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE));
469     PetscCall(PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE));
470     break;
471   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
472     PetscCallMPI(MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win));
473     update = PETSC_TRUE;
474     link->paddr = array;
475     break;
476 #endif
477 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
478   case PETSCSF_WINDOW_FLAVOR_SHARED:
479     PetscCallMPI(MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win));
480     update = PETSC_TRUE;
481     link->paddr = array;
482     break;
483 #endif
484   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
485   }
486   PetscCall(PetscInfo(sf,"New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf)));
487   *win = link->win;
488 
489 found:
490 
491   if (target_disp) *target_disp = link->dyn_target_addr;
492   if (reqs) *reqs = link->reqs;
493   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
494     PetscMPIInt rank;
495 
496     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
497     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win));
498     PetscCall(PetscMemcpy(link->addr,array,sf->nroots*bytes));
499     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
500       PetscCallMPI(MPI_Win_unlock(rank,*win));
501       PetscCallMPI(MPI_Win_fence(0,*win));
502     }
503   }
504   link->inuse = PETSC_TRUE;
505   link->epoch = epoch;
506   if (epoch) {
507     switch (sync) {
508     case PETSCSF_WINDOW_SYNC_FENCE:
509       PetscCallMPI(MPI_Win_fence(fenceassert,*win));
510       break;
511     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
512       break;
513     case PETSCSF_WINDOW_SYNC_ACTIVE: {
514       MPI_Group   ingroup,outgroup;
515       PetscMPIInt isize,osize;
516 
517       /* OpenMPI 4.0.2 with btl=vader does not like calling
518          - MPI_Win_complete when ogroup is empty
519          - MPI_Win_wait when igroup is empty
520          So, we do not even issue the corresponding start and post calls
521          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
522          start(outgroup) has a matching post(ingroup)
523          and this is guaranteed by PetscSF
524       */
525       PetscCall(PetscSFGetGroups(sf,&ingroup,&outgroup));
526       PetscCallMPI(MPI_Group_size(ingroup,&isize));
527       PetscCallMPI(MPI_Group_size(outgroup,&osize));
528       if (isize) PetscCallMPI(MPI_Win_post(ingroup,postassert,*win));
529       if (osize) PetscCallMPI(MPI_Win_start(outgroup,startassert,*win));
530     } break;
531     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
532     }
533   }
534   PetscFunctionReturn(0);
535 }
536 
537 /*
538    PetscSFFindWindow - Finds a window that is already in use
539 
540    Not Collective
541 
542    Input Parameters:
543 +  sf - star forest
544 .  unit - data type
545 -  array - array with which the window is associated
546 
547    Output Parameters:
548 +  win - window
549 -  reqs - outstanding requests associated to the window
550 
551    Level: developer
552 
553 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
554 */
555 static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
556 {
557   PetscSF_Window *w = (PetscSF_Window*)sf->data;
558   PetscSFWinLink link;
559 
560   PetscFunctionBegin;
561   *win = MPI_WIN_NULL;
562   for (link=w->wins; link; link=link->next) {
563     if (array == link->paddr) {
564 
565       PetscCall(PetscInfo(sf,"Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf)));
566       *win = link->win;
567       *reqs = link->reqs;
568       PetscFunctionReturn(0);
569     }
570   }
571   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
572 }
573 
574 /*
575    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
576 
577    Collective
578 
579    Input Parameters:
580 +  sf - star forest
581 .  unit - data type
582 .  array - array associated with window
583 .  sync - type of synchronization PetscSFWindowSyncType
584 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
585 .  update - if we have to update the local window array
586 -  win - window
587 
588    Level: developer
589 
590 .seealso: `PetscSFFindWindow()`
591 */
592 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
593 {
594   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
595   PetscSFWinLink          *p,link;
596   PetscBool               reuse = PETSC_FALSE;
597   PetscSFWindowFlavorType flavor;
598   void*                   laddr;
599   size_t                  bytes;
600 
601   PetscFunctionBegin;
602   for (p=&w->wins; *p; p=&(*p)->next) {
603     link = *p;
604     if (*win == link->win) {
605       PetscCheck(array == link->paddr,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
606       if (epoch != link->epoch) {
607         PetscCheck(!epoch,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
608         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
609       }
610       laddr = link->addr;
611       flavor = link->flavor;
612       bytes = link->bytes;
613       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
614       else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
615       goto found;
616     }
617   }
618   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
619 
620 found:
621   PetscCall(PetscInfo(sf,"Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf)));
622   if (epoch) {
623     switch (sync) {
624     case PETSCSF_WINDOW_SYNC_FENCE:
625       PetscCallMPI(MPI_Win_fence(fenceassert,*win));
626       break;
627     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
628       break;
629     case PETSCSF_WINDOW_SYNC_ACTIVE: {
630       MPI_Group   ingroup,outgroup;
631       PetscMPIInt isize,osize;
632 
633       /* OpenMPI 4.0.2 with btl=wader does not like calling
634          - MPI_Win_complete when ogroup is empty
635          - MPI_Win_wait when igroup is empty
636          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
637          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
638          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
639       */
640       PetscCall(PetscSFGetGroups(sf,&ingroup,&outgroup));
641       PetscCallMPI(MPI_Group_size(ingroup,&isize));
642       PetscCallMPI(MPI_Group_size(outgroup,&osize));
643       if (osize) PetscCallMPI(MPI_Win_complete(*win));
644       if (isize) PetscCallMPI(MPI_Win_wait(*win));
645     } break;
646     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
647     }
648   }
649   if (update) {
650     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
651       PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win));
652     }
653     PetscCall(PetscMemcpy(array,laddr,sf->nroots*bytes));
654   }
655   link->epoch = PETSC_FALSE;
656   link->inuse = PETSC_FALSE;
657   link->paddr = NULL;
658   if (!reuse) {
659     PetscCall(PetscFree(link->dyn_target_addr));
660     PetscCall(PetscFree(link->reqs));
661     PetscCallMPI(MPI_Win_free(&link->win));
662     PetscCall(PetscFree(link));
663     *win = MPI_WIN_NULL;
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
669 {
670   PetscSF_Window *w = (PetscSF_Window*)sf->data;
671   MPI_Group      ingroup,outgroup;
672 
673   PetscFunctionBegin;
674   PetscCall(PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY));
675   if (!w->dynsf) {
676     PetscInt    i;
677     PetscSFNode *remotes;
678 
679     PetscCall(PetscMalloc1(sf->nranks,&remotes));
680     for (i=0;i<sf->nranks;i++) {
681       remotes[i].rank  = sf->ranks[i];
682       remotes[i].index = 0;
683     }
684     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf));
685     PetscCall(PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */
686     PetscCall(PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER));
687     PetscCall(PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf));
688   }
689   switch (w->sync) {
690   case PETSCSF_WINDOW_SYNC_ACTIVE:
691     PetscCall(PetscSFGetGroups(sf,&ingroup,&outgroup));
692   default:
693     break;
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
699 {
700   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
701   PetscSFWindowFlavorType flavor = w->flavor;
702 
703   PetscFunctionBegin;
704   PetscOptionsHeadBegin(PetscOptionsObject,"PetscSF Window options");
705   PetscCall(PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL));
706   PetscCall(PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL));
707   PetscCall(PetscSFWindowSetFlavorType(sf,flavor));
708   PetscOptionsHeadEnd();
709   PetscFunctionReturn(0);
710 }
711 
712 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
713 {
714   PetscSF_Window  *w = (PetscSF_Window*)sf->data;
715   PetscSFDataLink link,next;
716   PetscSFWinLink  wlink,wnext;
717   PetscInt        i;
718 
719   PetscFunctionBegin;
720   for (link=w->link; link; link=next) {
721     next = link->next;
722     PetscCallMPI(MPI_Type_free(&link->unit));
723     for (i=0; i<sf->nranks; i++) {
724       PetscCallMPI(MPI_Type_free(&link->mine[i]));
725       PetscCallMPI(MPI_Type_free(&link->remote[i]));
726     }
727     PetscCall(PetscFree2(link->mine,link->remote));
728     PetscCall(PetscFree(link));
729   }
730   w->link = NULL;
731   for (wlink=w->wins; wlink; wlink=wnext) {
732     wnext = wlink->next;
733     PetscCheck(!wlink->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
734     PetscCall(PetscFree(wlink->dyn_target_addr));
735     PetscCall(PetscFree(wlink->reqs));
736     PetscCallMPI(MPI_Win_free(&wlink->win));
737     PetscCall(PetscFree(wlink));
738   }
739   w->wins = NULL;
740   PetscCall(PetscSFDestroy(&w->dynsf));
741   if (w->info != MPI_INFO_NULL) {
742     PetscCallMPI(MPI_Info_free(&w->info));
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
748 {
749   PetscFunctionBegin;
750   PetscCall(PetscSFReset_Window(sf));
751   PetscCall(PetscFree(sf->data));
752   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL));
755   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL));
757   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL));
758   PetscFunctionReturn(0);
759 }
760 
761 static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
762 {
763   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
764   PetscBool         iascii;
765   PetscViewerFormat format;
766 
767   PetscFunctionBegin;
768   PetscCall(PetscViewerGetFormat(viewer,&format));
769   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
770   if (iascii) {
771     PetscCall(PetscViewerASCIIPrintf(viewer,"  current flavor=%s synchronization=%s MultiSF sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered"));
772     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
773       if (w->info != MPI_INFO_NULL) {
774         PetscMPIInt k,nkeys;
775         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
776 
777         PetscCallMPI(MPI_Info_get_nkeys(w->info,&nkeys));
778         PetscCall(PetscViewerASCIIPrintf(viewer,"    current info with %d keys. Ordered key-value pairs follow:\n",nkeys));
779         for (k = 0; k < nkeys; k++) {
780           PetscMPIInt flag;
781 
782           PetscCallMPI(MPI_Info_get_nthkey(w->info,k,key));
783           PetscCallMPI(MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag));
784           PetscCheck(flag,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key);
785           PetscCall(PetscViewerASCIIPrintf(viewer,"      %s = %s\n",key,value));
786         }
787       } else {
788         PetscCall(PetscViewerASCIIPrintf(viewer,"    current info=MPI_INFO_NULL\n"));
789       }
790     }
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
796 {
797   PetscSF_Window        *w = (PetscSF_Window*)sf->data;
798   PetscSFWindowSyncType synctype;
799 
800   PetscFunctionBegin;
801   synctype = w->sync;
802   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
803   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
804   PetscCall(PetscSFWindowSetSyncType(newsf,synctype));
805   PetscCall(PetscSFWindowSetFlavorType(newsf,w->flavor));
806   PetscCall(PetscSFWindowSetInfo(newsf,w->info));
807   PetscFunctionReturn(0);
808 }
809 
810 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
811 {
812   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
813   PetscInt           i,nranks;
814   const PetscMPIInt  *ranks;
815   const MPI_Aint     *target_disp;
816   const MPI_Datatype *mine,*remote;
817   MPI_Request        *reqs;
818   MPI_Win            win;
819 
820   PetscFunctionBegin;
821   PetscCheck(op == MPI_REPLACE,PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
822   PetscCall(PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL));
823   PetscCall(PetscSFWindowGetDataTypes(sf,unit,&mine,&remote));
824   PetscCall(PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win));
825   for (i=0; i<nranks; i++) {
826     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
827 
828     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
829       PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win));
830 #if defined(PETSC_HAVE_MPI_RGET)
831       PetscCallMPI(MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]));
832 #else
833       PetscCallMPI(MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win));
834 #endif
835     } else {
836       PetscCallMPI(MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win));
837     }
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
843 {
844   PetscSF_Window *w = (PetscSF_Window*)sf->data;
845   MPI_Win        win;
846   MPI_Request    *reqs = NULL;
847 
848   PetscFunctionBegin;
849   PetscCall(PetscSFFindWindow(sf,unit,rootdata,&win,&reqs));
850   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE));
851   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
852     PetscInt           i,nranks;
853     const PetscMPIInt  *ranks;
854 
855     PetscCall(PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL));
856     for (i=0; i<nranks; i++) {
857       PetscCallMPI(MPI_Win_unlock(ranks[i],win));
858     }
859   }
860   PetscCall(PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win));
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
865 {
866   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
867   PetscInt           i,nranks;
868   const PetscMPIInt  *ranks;
869   const MPI_Aint     *target_disp;
870   const MPI_Datatype *mine,*remote;
871   MPI_Win            win;
872 
873   PetscFunctionBegin;
874   PetscCall(PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL));
875   PetscCall(PetscSFWindowGetDataTypes(sf,unit,&mine,&remote));
876   PetscCall(PetscSFWindowOpTranslate(&op));
877   PetscCall(PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win));
878   for (i=0; i<nranks; i++) {
879     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
880 
881     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win));
882     PetscCallMPI(MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win));
883     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i],win));
884   }
885   PetscFunctionReturn(0);
886 }
887 
888 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
889 {
890   PetscSF_Window *w = (PetscSF_Window*)sf->data;
891   MPI_Win        win;
892   MPI_Request    *reqs = NULL;
893 
894   PetscFunctionBegin;
895   PetscCall(PetscSFFindWindow(sf,unit,rootdata,&win,&reqs));
896   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE));
897   PetscCall(PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win));
898   PetscFunctionReturn(0);
899 }
900 
901 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
902 {
903   PetscInt           i,nranks;
904   const PetscMPIInt  *ranks;
905   const MPI_Datatype *mine,*remote;
906   const MPI_Aint     *target_disp;
907   MPI_Win            win;
908   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
909 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
910   PetscSFWindowFlavorType oldf;
911 #endif
912 
913   PetscFunctionBegin;
914   PetscCall(PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL));
915   PetscCall(PetscSFWindowGetDataTypes(sf,unit,&mine,&remote));
916   PetscCall(PetscSFWindowOpTranslate(&op));
917 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
918   /* FetchAndOp without MPI_Get_Accumulate requires locking.
919      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
920   oldf = w->flavor;
921   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
922   PetscCall(PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win));
923 #else
924   PetscCall(PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win));
925 #endif
926   for (i=0; i<nranks; i++) {
927     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
928 
929 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
930     PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win));
931     PetscCallMPI(MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win));
932     PetscCallMPI(MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win));
933     PetscCallMPI(MPI_Win_unlock(ranks[i],win));
934 #else
935     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win));
936     PetscCallMPI(MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win));
937     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i],win));
938 #endif
939   }
940 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
941   w->flavor = oldf;
942 #endif
943   PetscFunctionReturn(0);
944 }
945 
946 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
947 {
948   MPI_Win        win;
949 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
950   PetscSF_Window *w = (PetscSF_Window*)sf->data;
951 #endif
952   MPI_Request    *reqs = NULL;
953 
954   PetscFunctionBegin;
955   PetscCall(PetscSFFindWindow(sf,unit,rootdata,&win,&reqs));
956   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE));
957 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
958   PetscCall(PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win));
959 #else
960   PetscCall(PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win));
961 #endif
962   PetscFunctionReturn(0);
963 }
964 
965 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
966 {
967   PetscSF_Window *w = (PetscSF_Window*)sf->data;
968 
969   PetscFunctionBegin;
970   sf->ops->SetUp           = PetscSFSetUp_Window;
971   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
972   sf->ops->Reset           = PetscSFReset_Window;
973   sf->ops->Destroy         = PetscSFDestroy_Window;
974   sf->ops->View            = PetscSFView_Window;
975   sf->ops->Duplicate       = PetscSFDuplicate_Window;
976   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
977   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
978   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
979   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
980   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
981   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
982 
983   PetscCall(PetscNewLog(sf,&w));
984   sf->data  = (void*)w;
985   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
986   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
987   w->info   = MPI_INFO_NULL;
988 
989   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window));
990   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window));
991   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window));
992   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window));
993   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window));
994   PetscCall(PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window));
995 
996 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
997   {
998     PetscBool ackbug = PETSC_FALSE;
999     PetscCall(PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL));
1000     if (ackbug) {
1001       PetscCall(PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
1002     } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1003   }
1004 #endif
1005   PetscFunctionReturn(0);
1006 }
1007