xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype   *mine;
18   MPI_Datatype   *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                   *addr;
26   void                   *paddr;
27   MPI_Win                 win;
28   MPI_Request            *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint               *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[]   = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op) {
40   PetscFunctionBegin;
41   if (*op == MPIU_SUM) *op = MPI_SUM;
42   else if (*op == MPIU_MAX) *op = MPI_MAX;
43   else if (*op == MPIU_MIN) *op = MPI_MIN;
44   PetscFunctionReturn(0);
45 }
46 
47 /*@C
48    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
49 
50    Not Collective
51 
52    Input Parameters:
53 +  sf - star forest
54 -  unit - data type for each node
55 
56    Output Parameters:
57 +  localtypes - types describing part of local leaf buffer referencing each remote rank
58 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
59 
60    Level: developer
61 
62 .seealso: `PetscSFSetGraph()`, `PetscSFView()`
63 @*/
64 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes) {
65   PetscSF_Window    *w = (PetscSF_Window *)sf->data;
66   PetscSFDataLink    link;
67   PetscInt           i, nranks;
68   const PetscInt    *roffset, *rmine, *rremote;
69   const PetscMPIInt *ranks;
70 
71   PetscFunctionBegin;
72   /* Look for types in cache */
73   for (link = w->link; link; link = link->next) {
74     PetscBool match;
75     PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
76     if (match) {
77       *localtypes  = link->mine;
78       *remotetypes = link->remote;
79       PetscFunctionReturn(0);
80     }
81   }
82 
83   /* Create new composite types for each send rank */
84   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote));
85   PetscCall(PetscNew(&link));
86   PetscCallMPI(MPI_Type_dup(unit, &link->unit));
87   PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote));
88   for (i = 0; i < nranks; i++) {
89     PetscInt     rcount = roffset[i + 1] - roffset[i];
90     PetscMPIInt *rmine, *rremote;
91 #if !defined(PETSC_USE_64BIT_INDICES)
92     rmine   = sf->rmine + sf->roffset[i];
93     rremote = sf->rremote + sf->roffset[i];
94 #else
95     PetscInt j;
96     PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote));
97     for (j = 0; j < rcount; j++) {
98       PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j));
99       PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j));
100     }
101 #endif
102 
103     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]));
104     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]));
105 #if defined(PETSC_USE_64BIT_INDICES)
106     PetscCall(PetscFree2(rmine, rremote));
107 #endif
108     PetscCallMPI(MPI_Type_commit(&link->mine[i]));
109     PetscCallMPI(MPI_Type_commit(&link->remote[i]));
110   }
111   link->next = w->link;
112   w->link    = link;
113 
114   *localtypes  = link->mine;
115   *remotetypes = link->remote;
116   PetscFunctionReturn(0);
117 }
118 
119 /*@C
120    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
121 
122    Logically Collective
123 
124    Input Parameters:
125 +  sf - star forest for communication
126 -  flavor - flavor type
127 
128    Options Database Key:
129 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
130 
131    Level: advanced
132 
133    Notes: Windows reusage follow this rules:
134 
135      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
136 
137      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
138        for i=1 to K
139          PetscSFOperationBegin(rootdata1,leafdata_whatever);
140          PetscSFOperationEnd(rootdata1,leafdata_whatever);
141          ...
142          PetscSFOperationBegin(rootdataN,leafdata_whatever);
143          PetscSFOperationEnd(rootdataN,leafdata_whatever);
144        endfor
145        The following pattern will instead raise an error
146          PetscSFOperationBegin(rootdata1,leafdata_whatever);
147          PetscSFOperationEnd(rootdata1,leafdata_whatever);
148          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
149          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
150 
151      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
152 
153      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
154 
155 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
156 @*/
157 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor) {
158   PetscFunctionBegin;
159   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
160   PetscValidLogicalCollectiveEnum(sf, flavor, 2);
161   PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor) {
166   PetscSF_Window *w = (PetscSF_Window *)sf->data;
167 
168   PetscFunctionBegin;
169   w->flavor = flavor;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@C
174    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
175 
176    Logically Collective
177 
178    Input Parameter:
179 .  sf - star forest for communication
180 
181    Output Parameter:
182 .  flavor - flavor type
183 
184    Level: advanced
185 
186 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
187 @*/
188 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor) {
189   PetscFunctionBegin;
190   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
191   PetscValidPointer(flavor, 2);
192   PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
193   PetscFunctionReturn(0);
194 }
195 
196 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor) {
197   PetscSF_Window *w = (PetscSF_Window *)sf->data;
198 
199   PetscFunctionBegin;
200   *flavor = w->flavor;
201   PetscFunctionReturn(0);
202 }
203 
204 /*@C
205    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
206 
207    Logically Collective
208 
209    Input Parameters:
210 +  sf - star forest for communication
211 -  sync - synchronization type
212 
213    Options Database Key:
214 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
215 
216    Level: advanced
217 
218 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`
219 @*/
220 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync) {
221   PetscFunctionBegin;
222   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
223   PetscValidLogicalCollectiveEnum(sf, sync, 2);
224   PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync) {
229   PetscSF_Window *w = (PetscSF_Window *)sf->data;
230 
231   PetscFunctionBegin;
232   w->sync = sync;
233   PetscFunctionReturn(0);
234 }
235 
236 /*@C
237    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
238 
239    Logically Collective
240 
241    Input Parameter:
242 .  sf - star forest for communication
243 
244    Output Parameter:
245 .  sync - synchronization type
246 
247    Level: advanced
248 
249 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`
250 @*/
251 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync) {
252   PetscFunctionBegin;
253   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
254   PetscValidPointer(sync, 2);
255   PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
256   PetscFunctionReturn(0);
257 }
258 
259 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync) {
260   PetscSF_Window *w = (PetscSF_Window *)sf->data;
261 
262   PetscFunctionBegin;
263   *sync = w->sync;
264   PetscFunctionReturn(0);
265 }
266 
267 /*@C
268    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
269 
270    Logically Collective
271 
272    Input Parameters:
273 +  sf - star forest for communication
274 -  info - MPI_Info handle
275 
276    Level: advanced
277 
278    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
279 
280 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
281 @*/
282 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info) {
283   PetscFunctionBegin;
284   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
285   PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
286   PetscFunctionReturn(0);
287 }
288 
289 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info) {
290   PetscSF_Window *w = (PetscSF_Window *)sf->data;
291 
292   PetscFunctionBegin;
293   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
294   if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info));
295   PetscFunctionReturn(0);
296 }
297 
298 /*@C
299    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
300 
301    Logically Collective
302 
303    Input Parameter:
304 .  sf - star forest for communication
305 
306    Output Parameter:
307 .  info - MPI_Info handle
308 
309    Level: advanced
310 
311    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
312 
313 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
314 @*/
315 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info) {
316   PetscFunctionBegin;
317   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
318   PetscValidPointer(info, 2);
319   PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info) {
324   PetscSF_Window *w = (PetscSF_Window *)sf->data;
325 
326   PetscFunctionBegin;
327   *info = w->info;
328   PetscFunctionReturn(0);
329 }
330 
331 /*
332    PetscSFGetWindow - Get a window for use with a given data type
333 
334    Collective on PetscSF
335 
336    Input Parameters:
337 +  sf - star forest
338 .  unit - data type
339 .  array - array to be sent
340 .  sync - type of synchronization PetscSFWindowSyncType
341 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
342 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
343 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
344 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
345 
346    Output Parameters:
347 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
348 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
349 -  win - window
350 
351    Level: developer
352 .seealso: `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
353 */
354 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win) {
355   PetscSF_Window *w = (PetscSF_Window *)sf->data;
356   MPI_Aint        lb, lb_true, bytes, bytes_true;
357   PetscSFWinLink  link;
358 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
359   MPI_Aint winaddr;
360   PetscInt nranks;
361 #endif
362   PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
363   PetscBool dummy[2];
364   MPI_Aint  wsize;
365 
366   PetscFunctionBegin;
367   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
368   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
369   PetscCheck(lb == 0 && lb_true == 0, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
370   PetscCheck(bytes == bytes_true, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
371   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
372   for (link = w->wins; reuse && link; link = link->next) {
373     PetscBool winok = PETSC_FALSE;
374     if (w->flavor != link->flavor) continue;
375     switch (w->flavor) {
376     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
377       if (array == link->addr) {
378         if (PetscDefined(USE_DEBUG)) {
379           dummy[0] = PETSC_TRUE;
380           dummy[1] = PETSC_TRUE;
381           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
382           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
383           PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
384         }
385         PetscCheck(!link->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window in use");
386         PetscCheck(!epoch || !link->epoch, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window epoch not finished");
387         winok       = PETSC_TRUE;
388         link->paddr = array;
389       } else if (PetscDefined(USE_DEBUG)) {
390         dummy[0] = PETSC_FALSE;
391         dummy[1] = PETSC_FALSE;
392         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
393         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
394         PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
395       }
396       break;
397     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
398     case PETSCSF_WINDOW_FLAVOR_SHARED:
399       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
400         update      = PETSC_TRUE;
401         link->paddr = array;
402         winok       = PETSC_TRUE;
403       }
404       break;
405     default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
406     }
407     if (winok) {
408       *win = link->win;
409       PetscCall(PetscInfo(sf, "Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
410       goto found;
411     }
412   }
413 
414   wsize = (MPI_Aint)bytes * sf->nroots;
415   PetscCall(PetscNew(&link));
416   link->bytes           = bytes;
417   link->next            = w->wins;
418   link->flavor          = w->flavor;
419   link->dyn_target_addr = NULL;
420   link->reqs            = NULL;
421   w->wins               = link;
422   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
423     PetscInt i;
424 
425     PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
426     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
427   }
428   switch (w->flavor) {
429   case PETSCSF_WINDOW_FLAVOR_CREATE:
430     PetscCallMPI(MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win));
431     link->addr  = array;
432     link->paddr = array;
433     break;
434 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
435   case PETSCSF_WINDOW_FLAVOR_DYNAMIC: PetscCallMPI(MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win));
436 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
437     PetscCallMPI(MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize));
438 #else
439     PetscCallMPI(MPI_Win_attach(link->win, array, wsize));
440 #endif
441     link->addr  = array;
442     link->paddr = array;
443     PetscCheck(w->dynsf, PetscObjectComm((PetscObject)sf), PETSC_ERR_ORDER, "Must call PetscSFSetUp()");
444     PetscCall(PetscSFSetUp(w->dynsf));
445     PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL));
446     PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr));
447     PetscCallMPI(MPI_Get_address(array, &winaddr));
448     PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
449     PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
450     break;
451   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
452     PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
453     update      = PETSC_TRUE;
454     link->paddr = array;
455     break;
456 #endif
457 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
458   case PETSCSF_WINDOW_FLAVOR_SHARED:
459     PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
460     update      = PETSC_TRUE;
461     link->paddr = array;
462     break;
463 #endif
464   default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
465   }
466   PetscCall(PetscInfo(sf, "New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
467   *win = link->win;
468 
469 found:
470 
471   if (target_disp) *target_disp = link->dyn_target_addr;
472   if (reqs) *reqs = link->reqs;
473   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
474     PetscMPIInt rank;
475 
476     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
477     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win));
478     PetscCall(PetscMemcpy(link->addr, array, sf->nroots * bytes));
479     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
480       PetscCallMPI(MPI_Win_unlock(rank, *win));
481       PetscCallMPI(MPI_Win_fence(0, *win));
482     }
483   }
484   link->inuse = PETSC_TRUE;
485   link->epoch = epoch;
486   if (epoch) {
487     switch (sync) {
488     case PETSCSF_WINDOW_SYNC_FENCE: PetscCallMPI(MPI_Win_fence(fenceassert, *win)); break;
489     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ break;
490     case PETSCSF_WINDOW_SYNC_ACTIVE: {
491       MPI_Group   ingroup, outgroup;
492       PetscMPIInt isize, osize;
493 
494       /* OpenMPI 4.0.2 with btl=vader does not like calling
495          - MPI_Win_complete when ogroup is empty
496          - MPI_Win_wait when igroup is empty
497          So, we do not even issue the corresponding start and post calls
498          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
499          start(outgroup) has a matching post(ingroup)
500          and this is guaranteed by PetscSF
501       */
502       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
503       PetscCallMPI(MPI_Group_size(ingroup, &isize));
504       PetscCallMPI(MPI_Group_size(outgroup, &osize));
505       if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win));
506       if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win));
507     } break;
508     default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
509     }
510   }
511   PetscFunctionReturn(0);
512 }
513 
514 /*
515    PetscSFFindWindow - Finds a window that is already in use
516 
517    Not Collective
518 
519    Input Parameters:
520 +  sf - star forest
521 .  unit - data type
522 -  array - array with which the window is associated
523 
524    Output Parameters:
525 +  win - window
526 -  reqs - outstanding requests associated to the window
527 
528    Level: developer
529 
530 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
531 */
532 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs) {
533   PetscSF_Window *w = (PetscSF_Window *)sf->data;
534   PetscSFWinLink  link;
535 
536   PetscFunctionBegin;
537   *win = MPI_WIN_NULL;
538   for (link = w->wins; link; link = link->next) {
539     if (array == link->paddr) {
540       PetscCall(PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
541       *win  = link->win;
542       *reqs = link->reqs;
543       PetscFunctionReturn(0);
544     }
545   }
546   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
547 }
548 
549 /*
550    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
551 
552    Collective
553 
554    Input Parameters:
555 +  sf - star forest
556 .  unit - data type
557 .  array - array associated with window
558 .  sync - type of synchronization PetscSFWindowSyncType
559 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
560 .  update - if we have to update the local window array
561 -  win - window
562 
563    Level: developer
564 
565 .seealso: `PetscSFFindWindow()`
566 */
567 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win) {
568   PetscSF_Window         *w = (PetscSF_Window *)sf->data;
569   PetscSFWinLink         *p, link;
570   PetscBool               reuse = PETSC_FALSE;
571   PetscSFWindowFlavorType flavor;
572   void                   *laddr;
573   size_t                  bytes;
574 
575   PetscFunctionBegin;
576   for (p = &w->wins; *p; p = &(*p)->next) {
577     link = *p;
578     if (*win == link->win) {
579       PetscCheck(array == link->paddr, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array");
580       if (epoch != link->epoch) {
581         PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end");
582         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
583       }
584       laddr  = link->addr;
585       flavor = link->flavor;
586       bytes  = link->bytes;
587       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
588       else {
589         *p     = link->next;
590         update = PETSC_FALSE;
591       } /* remove from list */
592       goto found;
593     }
594   }
595   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
596 
597 found:
598   PetscCall(PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
599   if (epoch) {
600     switch (sync) {
601     case PETSCSF_WINDOW_SYNC_FENCE: PetscCallMPI(MPI_Win_fence(fenceassert, *win)); break;
602     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */ break;
603     case PETSCSF_WINDOW_SYNC_ACTIVE: {
604       MPI_Group   ingroup, outgroup;
605       PetscMPIInt isize, osize;
606 
607       /* OpenMPI 4.0.2 with btl=wader does not like calling
608          - MPI_Win_complete when ogroup is empty
609          - MPI_Win_wait when igroup is empty
610          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
611          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
612          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
613       */
614       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
615       PetscCallMPI(MPI_Group_size(ingroup, &isize));
616       PetscCallMPI(MPI_Group_size(outgroup, &osize));
617       if (osize) PetscCallMPI(MPI_Win_complete(*win));
618       if (isize) PetscCallMPI(MPI_Win_wait(*win));
619     } break;
620     default: SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
621     }
622   }
623   if (update) {
624     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win));
625     PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes));
626   }
627   link->epoch = PETSC_FALSE;
628   link->inuse = PETSC_FALSE;
629   link->paddr = NULL;
630   if (!reuse) {
631     PetscCall(PetscFree(link->dyn_target_addr));
632     PetscCall(PetscFree(link->reqs));
633     PetscCallMPI(MPI_Win_free(&link->win));
634     PetscCall(PetscFree(link));
635     *win = MPI_WIN_NULL;
636   }
637   PetscFunctionReturn(0);
638 }
639 
640 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf) {
641   PetscSF_Window *w = (PetscSF_Window *)sf->data;
642   MPI_Group       ingroup, outgroup;
643 
644   PetscFunctionBegin;
645   PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY));
646   if (!w->dynsf) {
647     PetscInt     i;
648     PetscSFNode *remotes;
649 
650     PetscCall(PetscMalloc1(sf->nranks, &remotes));
651     for (i = 0; i < sf->nranks; i++) {
652       remotes[i].rank  = sf->ranks[i];
653       remotes[i].index = 0;
654     }
655     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf));
656     PetscCall(PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */
657     PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
658     PetscCall(PetscLogObjectParent((PetscObject)sf, (PetscObject)w->dynsf));
659   }
660   switch (w->sync) {
661   case PETSCSF_WINDOW_SYNC_ACTIVE: PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
662   default: break;
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject) {
668   PetscSF_Window         *w      = (PetscSF_Window *)sf->data;
669   PetscSFWindowFlavorType flavor = w->flavor;
670 
671   PetscFunctionBegin;
672   PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
673   PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL));
674   PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL));
675   PetscCall(PetscSFWindowSetFlavorType(sf, flavor));
676   PetscOptionsHeadEnd();
677   PetscFunctionReturn(0);
678 }
679 
680 static PetscErrorCode PetscSFReset_Window(PetscSF sf) {
681   PetscSF_Window *w = (PetscSF_Window *)sf->data;
682   PetscSFDataLink link, next;
683   PetscSFWinLink  wlink, wnext;
684   PetscInt        i;
685 
686   PetscFunctionBegin;
687   for (link = w->link; link; link = next) {
688     next = link->next;
689     PetscCallMPI(MPI_Type_free(&link->unit));
690     for (i = 0; i < sf->nranks; i++) {
691       PetscCallMPI(MPI_Type_free(&link->mine[i]));
692       PetscCallMPI(MPI_Type_free(&link->remote[i]));
693     }
694     PetscCall(PetscFree2(link->mine, link->remote));
695     PetscCall(PetscFree(link));
696   }
697   w->link = NULL;
698   for (wlink = w->wins; wlink; wlink = wnext) {
699     wnext = wlink->next;
700     PetscCheck(!wlink->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr);
701     PetscCall(PetscFree(wlink->dyn_target_addr));
702     PetscCall(PetscFree(wlink->reqs));
703     PetscCallMPI(MPI_Win_free(&wlink->win));
704     PetscCall(PetscFree(wlink));
705   }
706   w->wins = NULL;
707   PetscCall(PetscSFDestroy(&w->dynsf));
708   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
709   PetscFunctionReturn(0);
710 }
711 
712 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf) {
713   PetscFunctionBegin;
714   PetscCall(PetscSFReset_Window(sf));
715   PetscCall(PetscFree(sf->data));
716   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL));
717   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL));
718   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL));
719   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL));
720   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL));
721   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL));
722   PetscFunctionReturn(0);
723 }
724 
725 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer) {
726   PetscSF_Window   *w = (PetscSF_Window *)sf->data;
727   PetscBool         iascii;
728   PetscViewerFormat format;
729 
730   PetscFunctionBegin;
731   PetscCall(PetscViewerGetFormat(viewer, &format));
732   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
733   if (iascii) {
734     PetscCall(PetscViewerASCIIPrintf(viewer, "  current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered"));
735     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
736       if (w->info != MPI_INFO_NULL) {
737         PetscMPIInt k, nkeys;
738         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
739 
740         PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys));
741         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info with %d keys. Ordered key-value pairs follow:\n", nkeys));
742         for (k = 0; k < nkeys; k++) {
743           PetscMPIInt flag;
744 
745           PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key));
746           PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag));
747           PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key);
748           PetscCall(PetscViewerASCIIPrintf(viewer, "      %s = %s\n", key, value));
749         }
750       } else {
751         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info=MPI_INFO_NULL\n"));
752       }
753     }
754   }
755   PetscFunctionReturn(0);
756 }
757 
758 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf) {
759   PetscSF_Window       *w = (PetscSF_Window *)sf->data;
760   PetscSFWindowSyncType synctype;
761 
762   PetscFunctionBegin;
763   synctype = w->sync;
764   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
765   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
766   PetscCall(PetscSFWindowSetSyncType(newsf, synctype));
767   PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor));
768   PetscCall(PetscSFWindowSetInfo(newsf, w->info));
769   PetscFunctionReturn(0);
770 }
771 
772 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) {
773   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
774   PetscInt            i, nranks;
775   const PetscMPIInt  *ranks;
776   const MPI_Aint     *target_disp;
777   const MPI_Datatype *mine, *remote;
778   MPI_Request        *reqs;
779   MPI_Win             win;
780 
781   PetscFunctionBegin;
782   PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
783   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
784   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
785   PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win));
786   for (i = 0; i < nranks; i++) {
787     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
788 
789     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
790       PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
791 #if defined(PETSC_HAVE_MPI_RGET)
792       PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]));
793 #else
794       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
795 #endif
796     } else {
797       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
798     }
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) {
804   PetscSF_Window *w = (PetscSF_Window *)sf->data;
805   MPI_Win         win;
806   MPI_Request    *reqs = NULL;
807 
808   PetscFunctionBegin;
809   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
810   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
811   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
812     PetscInt           i, nranks;
813     const PetscMPIInt *ranks;
814 
815     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
816     for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
817   }
818   PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win));
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) {
823   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
824   PetscInt            i, nranks;
825   const PetscMPIInt  *ranks;
826   const MPI_Aint     *target_disp;
827   const MPI_Datatype *mine, *remote;
828   MPI_Win             win;
829 
830   PetscFunctionBegin;
831   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
832   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
833   PetscCall(PetscSFWindowOpTranslate(&op));
834   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
835   for (i = 0; i < nranks; i++) {
836     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
837 
838     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
839     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
840     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) {
846   PetscSF_Window *w = (PetscSF_Window *)sf->data;
847   MPI_Win         win;
848   MPI_Request    *reqs = NULL;
849 
850   PetscFunctionBegin;
851   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
852   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
853   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
854   PetscFunctionReturn(0);
855 }
856 
857 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) {
858   PetscInt            i, nranks;
859   const PetscMPIInt  *ranks;
860   const MPI_Datatype *mine, *remote;
861   const MPI_Aint     *target_disp;
862   MPI_Win             win;
863   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
864 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
865   PetscSFWindowFlavorType oldf;
866 #endif
867 
868   PetscFunctionBegin;
869   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
870   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
871   PetscCall(PetscSFWindowOpTranslate(&op));
872 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
873   /* FetchAndOp without MPI_Get_Accumulate requires locking.
874      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
875   oldf      = w->flavor;
876   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
877   PetscCall(PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win));
878 #else
879   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
880 #endif
881   for (i = 0; i < nranks; i++) {
882     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
883 
884 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
885     PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win));
886     PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
887     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
888     PetscCallMPI(MPI_Win_unlock(ranks[i], win));
889 #else
890     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win));
891     PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
892     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
893 #endif
894   }
895 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
896   w->flavor = oldf;
897 #endif
898   PetscFunctionReturn(0);
899 }
900 
901 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) {
902   MPI_Win win;
903 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
904   PetscSF_Window *w = (PetscSF_Window *)sf->data;
905 #endif
906   MPI_Request *reqs = NULL;
907 
908   PetscFunctionBegin;
909   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
910   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
911 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
912   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
913 #else
914   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win));
915 #endif
916   PetscFunctionReturn(0);
917 }
918 
919 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf) {
920   PetscSF_Window *w = (PetscSF_Window *)sf->data;
921 
922   PetscFunctionBegin;
923   sf->ops->SetUp           = PetscSFSetUp_Window;
924   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
925   sf->ops->Reset           = PetscSFReset_Window;
926   sf->ops->Destroy         = PetscSFDestroy_Window;
927   sf->ops->View            = PetscSFView_Window;
928   sf->ops->Duplicate       = PetscSFDuplicate_Window;
929   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
930   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
931   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
932   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
933   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
934   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
935 
936   PetscCall(PetscNewLog(sf, &w));
937   sf->data  = (void *)w;
938   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
939   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
940   w->info   = MPI_INFO_NULL;
941 
942   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window));
943   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window));
944   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window));
945   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window));
946   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window));
947   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window));
948 
949 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
950   {
951     PetscBool ackbug = PETSC_FALSE;
952     PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL));
953     if (ackbug) {
954       PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
955     } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
956   }
957 #endif
958   PetscFunctionReturn(0);
959 }
960