xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision 750b007cd8d816cecd9de99077bb0a703b4cf61a)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype   *mine;
18   MPI_Datatype   *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                   *addr;
26   void                   *paddr;
27   MPI_Win                 win;
28   MPI_Request            *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint               *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[]   = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op) {
40   PetscFunctionBegin;
41   if (*op == MPIU_SUM) *op = MPI_SUM;
42   else if (*op == MPIU_MAX) *op = MPI_MAX;
43   else if (*op == MPIU_MIN) *op = MPI_MIN;
44   PetscFunctionReturn(0);
45 }
46 
47 /*@C
48    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
49 
50    Not Collective
51 
52    Input Parameters:
53 +  sf - star forest
54 -  unit - data type for each node
55 
56    Output Parameters:
57 +  localtypes - types describing part of local leaf buffer referencing each remote rank
58 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
59 
60    Level: developer
61 
62 .seealso: `PetscSFSetGraph()`, `PetscSFView()`
63 @*/
64 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes) {
65   PetscSF_Window    *w = (PetscSF_Window *)sf->data;
66   PetscSFDataLink    link;
67   PetscInt           i, nranks;
68   const PetscInt    *roffset, *rmine, *rremote;
69   const PetscMPIInt *ranks;
70 
71   PetscFunctionBegin;
72   /* Look for types in cache */
73   for (link = w->link; link; link = link->next) {
74     PetscBool match;
75     PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
76     if (match) {
77       *localtypes  = link->mine;
78       *remotetypes = link->remote;
79       PetscFunctionReturn(0);
80     }
81   }
82 
83   /* Create new composite types for each send rank */
84   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote));
85   PetscCall(PetscNew(&link));
86   PetscCallMPI(MPI_Type_dup(unit, &link->unit));
87   PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote));
88   for (i = 0; i < nranks; i++) {
89     PetscInt     rcount = roffset[i + 1] - roffset[i];
90     PetscMPIInt *rmine, *rremote;
91 #if !defined(PETSC_USE_64BIT_INDICES)
92     rmine   = sf->rmine + sf->roffset[i];
93     rremote = sf->rremote + sf->roffset[i];
94 #else
95     PetscInt j;
96     PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote));
97     for (j = 0; j < rcount; j++) {
98       PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j));
99       PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j));
100     }
101 #endif
102 
103     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]));
104     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]));
105 #if defined(PETSC_USE_64BIT_INDICES)
106     PetscCall(PetscFree2(rmine, rremote));
107 #endif
108     PetscCallMPI(MPI_Type_commit(&link->mine[i]));
109     PetscCallMPI(MPI_Type_commit(&link->remote[i]));
110   }
111   link->next = w->link;
112   w->link    = link;
113 
114   *localtypes  = link->mine;
115   *remotetypes = link->remote;
116   PetscFunctionReturn(0);
117 }
118 
119 /*@C
120    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
121 
122    Logically Collective
123 
124    Input Parameters:
125 +  sf - star forest for communication
126 -  flavor - flavor type
127 
128    Options Database Key:
129 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
130 
131    Level: advanced
132 
133    Notes: Windows reusage follow this rules:
134 
135      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
136 
137      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
138        for i=1 to K
139          PetscSFOperationBegin(rootdata1,leafdata_whatever);
140          PetscSFOperationEnd(rootdata1,leafdata_whatever);
141          ...
142          PetscSFOperationBegin(rootdataN,leafdata_whatever);
143          PetscSFOperationEnd(rootdataN,leafdata_whatever);
144        endfor
145        The following pattern will instead raise an error
146          PetscSFOperationBegin(rootdata1,leafdata_whatever);
147          PetscSFOperationEnd(rootdata1,leafdata_whatever);
148          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
149          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
150 
151      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
152 
153      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
154 
155 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
156 @*/
157 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor) {
158   PetscFunctionBegin;
159   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
160   PetscValidLogicalCollectiveEnum(sf, flavor, 2);
161   PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor) {
166   PetscSF_Window *w = (PetscSF_Window *)sf->data;
167 
168   PetscFunctionBegin;
169   w->flavor = flavor;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@C
174    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
175 
176    Logically Collective
177 
178    Input Parameter:
179 .  sf - star forest for communication
180 
181    Output Parameter:
182 .  flavor - flavor type
183 
184    Level: advanced
185 
186 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
187 @*/
188 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor) {
189   PetscFunctionBegin;
190   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
191   PetscValidPointer(flavor, 2);
192   PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
193   PetscFunctionReturn(0);
194 }
195 
196 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor) {
197   PetscSF_Window *w = (PetscSF_Window *)sf->data;
198 
199   PetscFunctionBegin;
200   *flavor = w->flavor;
201   PetscFunctionReturn(0);
202 }
203 
204 /*@C
205    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
206 
207    Logically Collective
208 
209    Input Parameters:
210 +  sf - star forest for communication
211 -  sync - synchronization type
212 
213    Options Database Key:
214 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
215 
216    Level: advanced
217 
218 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`
219 @*/
220 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync) {
221   PetscFunctionBegin;
222   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
223   PetscValidLogicalCollectiveEnum(sf, sync, 2);
224   PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync) {
229   PetscSF_Window *w = (PetscSF_Window *)sf->data;
230 
231   PetscFunctionBegin;
232   w->sync = sync;
233   PetscFunctionReturn(0);
234 }
235 
236 /*@C
237    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
238 
239    Logically Collective
240 
241    Input Parameter:
242 .  sf - star forest for communication
243 
244    Output Parameter:
245 .  sync - synchronization type
246 
247    Level: advanced
248 
249 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`
250 @*/
251 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync) {
252   PetscFunctionBegin;
253   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
254   PetscValidPointer(sync, 2);
255   PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
256   PetscFunctionReturn(0);
257 }
258 
259 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync) {
260   PetscSF_Window *w = (PetscSF_Window *)sf->data;
261 
262   PetscFunctionBegin;
263   *sync = w->sync;
264   PetscFunctionReturn(0);
265 }
266 
267 /*@C
268    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
269 
270    Logically Collective
271 
272    Input Parameters:
273 +  sf - star forest for communication
274 -  info - MPI_Info handle
275 
276    Level: advanced
277 
278    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
279 
280 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
281 @*/
282 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info) {
283   PetscFunctionBegin;
284   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
285   PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
286   PetscFunctionReturn(0);
287 }
288 
289 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info) {
290   PetscSF_Window *w = (PetscSF_Window *)sf->data;
291 
292   PetscFunctionBegin;
293   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
294   if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info));
295   PetscFunctionReturn(0);
296 }
297 
298 /*@C
299    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
300 
301    Logically Collective
302 
303    Input Parameter:
304 .  sf - star forest for communication
305 
306    Output Parameter:
307 .  info - MPI_Info handle
308 
309    Level: advanced
310 
311    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
312 
313 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
314 @*/
315 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info) {
316   PetscFunctionBegin;
317   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
318   PetscValidPointer(info, 2);
319   PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info) {
324   PetscSF_Window *w = (PetscSF_Window *)sf->data;
325 
326   PetscFunctionBegin;
327   *info = w->info;
328   PetscFunctionReturn(0);
329 }
330 
331 /*
332    PetscSFGetWindow - Get a window for use with a given data type
333 
334    Collective on PetscSF
335 
336    Input Parameters:
337 +  sf - star forest
338 .  unit - data type
339 .  array - array to be sent
340 .  sync - type of synchronization PetscSFWindowSyncType
341 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
342 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
343 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
344 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
345 
346    Output Parameters:
347 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
348 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
349 -  win - window
350 
351    Level: developer
352 .seealso: `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
353 */
354 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win) {
355   PetscSF_Window *w = (PetscSF_Window *)sf->data;
356   MPI_Aint        lb, lb_true, bytes, bytes_true;
357   PetscSFWinLink  link;
358 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
359   MPI_Aint winaddr;
360   PetscInt nranks;
361 #endif
362   PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
363   PetscBool dummy[2];
364   MPI_Aint  wsize;
365 
366   PetscFunctionBegin;
367   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
368   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
369   PetscCheck(lb == 0 && lb_true == 0, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
370   PetscCheck(bytes == bytes_true, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
371   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
372   for (link = w->wins; reuse && link; link = link->next) {
373     PetscBool winok = PETSC_FALSE;
374     if (w->flavor != link->flavor) continue;
375     switch (w->flavor) {
376     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
377       if (array == link->addr) {
378         if (PetscDefined(USE_DEBUG)) {
379           dummy[0] = PETSC_TRUE;
380           dummy[1] = PETSC_TRUE;
381           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
382           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
383           PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
384         }
385         PetscCheck(!link->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window in use");
386         PetscCheck(!epoch || !link->epoch, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window epoch not finished");
387         winok       = PETSC_TRUE;
388         link->paddr = array;
389       } else if (PetscDefined(USE_DEBUG)) {
390         dummy[0] = PETSC_FALSE;
391         dummy[1] = PETSC_FALSE;
392         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
393         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
394         PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
395       }
396       break;
397     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
398     case PETSCSF_WINDOW_FLAVOR_SHARED:
399       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
400         update      = PETSC_TRUE;
401         link->paddr = array;
402         winok       = PETSC_TRUE;
403       }
404       break;
405     default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
406     }
407     if (winok) {
408       *win = link->win;
409       PetscCall(PetscInfo(sf, "Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
410       goto found;
411     }
412   }
413 
414   wsize = (MPI_Aint)bytes * sf->nroots;
415   PetscCall(PetscNew(&link));
416   link->bytes           = bytes;
417   link->next            = w->wins;
418   link->flavor          = w->flavor;
419   link->dyn_target_addr = NULL;
420   link->reqs            = NULL;
421   w->wins               = link;
422   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
423     PetscInt i;
424 
425     PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
426     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
427   }
428   switch (w->flavor) {
429   case PETSCSF_WINDOW_FLAVOR_CREATE:
430     PetscCallMPI(MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win));
431     link->addr  = array;
432     link->paddr = array;
433     break;
434 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
435   case PETSCSF_WINDOW_FLAVOR_DYNAMIC: PetscCallMPI(MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win));
436 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
437     PetscCallMPI(MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize));
438 #else
439     PetscCallMPI(MPI_Win_attach(link->win, array, wsize));
440 #endif
441     link->addr  = array;
442     link->paddr = array;
443     PetscCheck(w->dynsf, PetscObjectComm((PetscObject)sf), PETSC_ERR_ORDER, "Must call PetscSFSetUp()");
444     PetscCall(PetscSFSetUp(w->dynsf));
445     PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL));
446     PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr));
447     PetscCallMPI(MPI_Get_address(array, &winaddr));
448     PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
449     PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
450     break;
451   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
452     PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
453     update      = PETSC_TRUE;
454     link->paddr = array;
455     break;
456 #endif
457 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
458   case PETSCSF_WINDOW_FLAVOR_SHARED:
459     PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
460     update      = PETSC_TRUE;
461     link->paddr = array;
462     break;
463 #endif
464   default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
465   }
466   PetscCall(PetscInfo(sf, "New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
467   *win = link->win;
468 
469 found:
470 
471   if (target_disp) *target_disp = link->dyn_target_addr;
472   if (reqs) *reqs = link->reqs;
473   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
474     PetscMPIInt rank;
475 
476     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
477     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win));
478     PetscCall(PetscMemcpy(link->addr, array, sf->nroots * bytes));
479     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
480       PetscCallMPI(MPI_Win_unlock(rank, *win));
481       PetscCallMPI(MPI_Win_fence(0, *win));
482     }
483   }
484   link->inuse = PETSC_TRUE;
485   link->epoch = epoch;
486   if (epoch) {
487     switch (sync) {
488     case PETSCSF_WINDOW_SYNC_FENCE: PetscCallMPI(MPI_Win_fence(fenceassert, *win)); break;
489     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ break;
490     case PETSCSF_WINDOW_SYNC_ACTIVE: {
491       MPI_Group   ingroup, outgroup;
492       PetscMPIInt isize, osize;
493 
494       /* OpenMPI 4.0.2 with btl=vader does not like calling
495          - MPI_Win_complete when ogroup is empty
496          - MPI_Win_wait when igroup is empty
497          So, we do not even issue the corresponding start and post calls
498          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
499          start(outgroup) has a matching post(ingroup)
500          and this is guaranteed by PetscSF
501       */
502       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
503       PetscCallMPI(MPI_Group_size(ingroup, &isize));
504       PetscCallMPI(MPI_Group_size(outgroup, &osize));
505       if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win));
506       if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win));
507     } break;
508     default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
509     }
510   }
511   PetscFunctionReturn(0);
512 }
513 
514 /*
515    PetscSFFindWindow - Finds a window that is already in use
516 
517    Not Collective
518 
519    Input Parameters:
520 +  sf - star forest
521 .  unit - data type
522 -  array - array with which the window is associated
523 
524    Output Parameters:
525 +  win - window
526 -  reqs - outstanding requests associated to the window
527 
528    Level: developer
529 
530 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
531 */
532 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs) {
533   PetscSF_Window *w = (PetscSF_Window *)sf->data;
534   PetscSFWinLink  link;
535 
536   PetscFunctionBegin;
537   *win = MPI_WIN_NULL;
538   for (link = w->wins; link; link = link->next) {
539     if (array == link->paddr) {
540       PetscCall(PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
541       *win  = link->win;
542       *reqs = link->reqs;
543       PetscFunctionReturn(0);
544     }
545   }
546   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
547 }
548 
549 /*
550    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
551 
552    Collective
553 
554    Input Parameters:
555 +  sf - star forest
556 .  unit - data type
557 .  array - array associated with window
558 .  sync - type of synchronization PetscSFWindowSyncType
559 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
560 .  update - if we have to update the local window array
561 -  win - window
562 
563    Level: developer
564 
565 .seealso: `PetscSFFindWindow()`
566 */
567 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win) {
568   PetscSF_Window         *w = (PetscSF_Window *)sf->data;
569   PetscSFWinLink         *p, link;
570   PetscBool               reuse = PETSC_FALSE;
571   PetscSFWindowFlavorType flavor;
572   void                   *laddr;
573   size_t                  bytes;
574 
575   PetscFunctionBegin;
576   for (p = &w->wins; *p; p = &(*p)->next) {
577     link = *p;
578     if (*win == link->win) {
579       PetscCheck(array == link->paddr, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array");
580       if (epoch != link->epoch) {
581         PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end");
582         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
583       }
584       laddr  = link->addr;
585       flavor = link->flavor;
586       bytes  = link->bytes;
587       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
588       else {
589         *p     = link->next;
590         update = PETSC_FALSE;
591       } /* remove from list */
592       goto found;
593     }
594   }
595   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
596 
597 found:
598   PetscCall(PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
599   if (epoch) {
600     switch (sync) {
601     case PETSCSF_WINDOW_SYNC_FENCE: PetscCallMPI(MPI_Win_fence(fenceassert, *win)); break;
602     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ break;
603     case PETSCSF_WINDOW_SYNC_ACTIVE: {
604       MPI_Group   ingroup, outgroup;
605       PetscMPIInt isize, osize;
606 
607       /* OpenMPI 4.0.2 with btl=wader does not like calling
608          - MPI_Win_complete when ogroup is empty
609          - MPI_Win_wait when igroup is empty
610          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
611          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
612          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
613       */
614       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
615       PetscCallMPI(MPI_Group_size(ingroup, &isize));
616       PetscCallMPI(MPI_Group_size(outgroup, &osize));
617       if (osize) PetscCallMPI(MPI_Win_complete(*win));
618       if (isize) PetscCallMPI(MPI_Win_wait(*win));
619     } break;
620     default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
621     }
622   }
623   if (update) {
624     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win));
625     PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes));
626   }
627   link->epoch = PETSC_FALSE;
628   link->inuse = PETSC_FALSE;
629   link->paddr = NULL;
630   if (!reuse) {
631     PetscCall(PetscFree(link->dyn_target_addr));
632     PetscCall(PetscFree(link->reqs));
633     PetscCallMPI(MPI_Win_free(&link->win));
634     PetscCall(PetscFree(link));
635     *win = MPI_WIN_NULL;
636   }
637   PetscFunctionReturn(0);
638 }
639 
640 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf) {
641   PetscSF_Window *w = (PetscSF_Window *)sf->data;
642   MPI_Group       ingroup, outgroup;
643 
644   PetscFunctionBegin;
645   PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY));
646   if (!w->dynsf) {
647     PetscInt     i;
648     PetscSFNode *remotes;
649 
650     PetscCall(PetscMalloc1(sf->nranks, &remotes));
651     for (i = 0; i < sf->nranks; i++) {
652       remotes[i].rank  = sf->ranks[i];
653       remotes[i].index = 0;
654     }
655     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf));
656     PetscCall(PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */
657     PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
658   }
659   switch (w->sync) {
660   case PETSCSF_WINDOW_SYNC_ACTIVE: PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
661   default: break;
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject) {
667   PetscSF_Window         *w      = (PetscSF_Window *)sf->data;
668   PetscSFWindowFlavorType flavor = w->flavor;
669 
670   PetscFunctionBegin;
671   PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
672   PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL));
673   PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL));
674   PetscCall(PetscSFWindowSetFlavorType(sf, flavor));
675   PetscOptionsHeadEnd();
676   PetscFunctionReturn(0);
677 }
678 
679 static PetscErrorCode PetscSFReset_Window(PetscSF sf) {
680   PetscSF_Window *w = (PetscSF_Window *)sf->data;
681   PetscSFDataLink link, next;
682   PetscSFWinLink  wlink, wnext;
683   PetscInt        i;
684 
685   PetscFunctionBegin;
686   for (link = w->link; link; link = next) {
687     next = link->next;
688     PetscCallMPI(MPI_Type_free(&link->unit));
689     for (i = 0; i < sf->nranks; i++) {
690       PetscCallMPI(MPI_Type_free(&link->mine[i]));
691       PetscCallMPI(MPI_Type_free(&link->remote[i]));
692     }
693     PetscCall(PetscFree2(link->mine, link->remote));
694     PetscCall(PetscFree(link));
695   }
696   w->link = NULL;
697   for (wlink = w->wins; wlink; wlink = wnext) {
698     wnext = wlink->next;
699     PetscCheck(!wlink->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr);
700     PetscCall(PetscFree(wlink->dyn_target_addr));
701     PetscCall(PetscFree(wlink->reqs));
702     PetscCallMPI(MPI_Win_free(&wlink->win));
703     PetscCall(PetscFree(wlink));
704   }
705   w->wins = NULL;
706   PetscCall(PetscSFDestroy(&w->dynsf));
707   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf) {
712   PetscFunctionBegin;
713   PetscCall(PetscSFReset_Window(sf));
714   PetscCall(PetscFree(sf->data));
715   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL));
716   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL));
721   PetscFunctionReturn(0);
722 }
723 
724 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer) {
725   PetscSF_Window   *w = (PetscSF_Window *)sf->data;
726   PetscBool         iascii;
727   PetscViewerFormat format;
728 
729   PetscFunctionBegin;
730   PetscCall(PetscViewerGetFormat(viewer, &format));
731   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
732   if (iascii) {
733     PetscCall(PetscViewerASCIIPrintf(viewer, "  current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered"));
734     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
735       if (w->info != MPI_INFO_NULL) {
736         PetscMPIInt k, nkeys;
737         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
738 
739         PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys));
740         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info with %d keys. Ordered key-value pairs follow:\n", nkeys));
741         for (k = 0; k < nkeys; k++) {
742           PetscMPIInt flag;
743 
744           PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key));
745           PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag));
746           PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key);
747           PetscCall(PetscViewerASCIIPrintf(viewer, "      %s = %s\n", key, value));
748         }
749       } else {
750         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info=MPI_INFO_NULL\n"));
751       }
752     }
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf) {
758   PetscSF_Window       *w = (PetscSF_Window *)sf->data;
759   PetscSFWindowSyncType synctype;
760 
761   PetscFunctionBegin;
762   synctype = w->sync;
763   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
764   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
765   PetscCall(PetscSFWindowSetSyncType(newsf, synctype));
766   PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor));
767   PetscCall(PetscSFWindowSetInfo(newsf, w->info));
768   PetscFunctionReturn(0);
769 }
770 
771 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) {
772   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
773   PetscInt            i, nranks;
774   const PetscMPIInt  *ranks;
775   const MPI_Aint     *target_disp;
776   const MPI_Datatype *mine, *remote;
777   MPI_Request        *reqs;
778   MPI_Win             win;
779 
780   PetscFunctionBegin;
781   PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
782   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
783   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
784   PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win));
785   for (i = 0; i < nranks; i++) {
786     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
787 
788     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
789       PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
790 #if defined(PETSC_HAVE_MPI_RGET)
791       PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]));
792 #else
793       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
794 #endif
795     } else {
796       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
797     }
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) {
803   PetscSF_Window *w = (PetscSF_Window *)sf->data;
804   MPI_Win         win;
805   MPI_Request    *reqs = NULL;
806 
807   PetscFunctionBegin;
808   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
809   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
810   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
811     PetscInt           i, nranks;
812     const PetscMPIInt *ranks;
813 
814     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
815     for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
816   }
817   PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win));
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) {
822   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
823   PetscInt            i, nranks;
824   const PetscMPIInt  *ranks;
825   const MPI_Aint     *target_disp;
826   const MPI_Datatype *mine, *remote;
827   MPI_Win             win;
828 
829   PetscFunctionBegin;
830   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
831   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
832   PetscCall(PetscSFWindowOpTranslate(&op));
833   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
834   for (i = 0; i < nranks; i++) {
835     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
836 
837     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
838     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
839     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) {
845   PetscSF_Window *w = (PetscSF_Window *)sf->data;
846   MPI_Win         win;
847   MPI_Request    *reqs = NULL;
848 
849   PetscFunctionBegin;
850   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
851   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
852   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
853   PetscFunctionReturn(0);
854 }
855 
856 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) {
857   PetscInt            i, nranks;
858   const PetscMPIInt  *ranks;
859   const MPI_Datatype *mine, *remote;
860   const MPI_Aint     *target_disp;
861   MPI_Win             win;
862   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
863 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
864   PetscSFWindowFlavorType oldf;
865 #endif
866 
867   PetscFunctionBegin;
868   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
869   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
870   PetscCall(PetscSFWindowOpTranslate(&op));
871 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
872   /* FetchAndOp without MPI_Get_Accumulate requires locking.
873      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
874   oldf      = w->flavor;
875   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
876   PetscCall(PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win));
877 #else
878   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
879 #endif
880   for (i = 0; i < nranks; i++) {
881     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
882 
883 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
884     PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win));
885     PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
886     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
887     PetscCallMPI(MPI_Win_unlock(ranks[i], win));
888 #else
889     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win));
890     PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
891     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
892 #endif
893   }
894 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
895   w->flavor = oldf;
896 #endif
897   PetscFunctionReturn(0);
898 }
899 
900 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) {
901   MPI_Win win;
902 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
903   PetscSF_Window *w = (PetscSF_Window *)sf->data;
904 #endif
905   MPI_Request *reqs = NULL;
906 
907   PetscFunctionBegin;
908   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
909   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
910 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
911   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
912 #else
913   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win));
914 #endif
915   PetscFunctionReturn(0);
916 }
917 
918 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf) {
919   PetscSF_Window *w = (PetscSF_Window *)sf->data;
920 
921   PetscFunctionBegin;
922   sf->ops->SetUp           = PetscSFSetUp_Window;
923   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
924   sf->ops->Reset           = PetscSFReset_Window;
925   sf->ops->Destroy         = PetscSFDestroy_Window;
926   sf->ops->View            = PetscSFView_Window;
927   sf->ops->Duplicate       = PetscSFDuplicate_Window;
928   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
929   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
930   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
931   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
932   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
933   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
934 
935   PetscCall(PetscNew(&w));
936   sf->data  = (void *)w;
937   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
938   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
939   w->info   = MPI_INFO_NULL;
940 
941   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window));
942   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window));
943   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window));
944   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window));
945   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window));
946   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window));
947 
948 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
949   {
950     PetscBool ackbug = PETSC_FALSE;
951     PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL));
952     if (ackbug) {
953       PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
954     } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
955   }
956 #endif
957   PetscFunctionReturn(0);
958 }
959