xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision 89928cc5142e867cb7b0dd1ff74e0acffcd6b4b5)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype   *mine;
18   MPI_Datatype   *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                   *addr;
26   void                   *paddr;
27   MPI_Win                 win;
28   MPI_Request            *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint               *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[]   = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40 {
41   PetscFunctionBegin;
42   if (*op == MPIU_SUM) *op = MPI_SUM;
43   else if (*op == MPIU_MAX) *op = MPI_MAX;
44   else if (*op == MPIU_MIN) *op = MPI_MIN;
45   PetscFunctionReturn(0);
46 }
47 
48 /*@C
49    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50 
51    Not Collective
52 
53    Input Parameters:
54 +  sf - star forest
55 -  unit - data type for each node
56 
57    Output Parameters:
58 +  localtypes - types describing part of local leaf buffer referencing each remote rank
59 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
60 
61    Level: developer
62 
63 .seealso: `PetscSFSetGraph()`, `PetscSFView()`
64 @*/
65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes)
66 {
67   PetscSF_Window    *w = (PetscSF_Window *)sf->data;
68   PetscSFDataLink    link;
69   PetscInt           i, nranks;
70   const PetscInt    *roffset, *rmine, *rremote;
71   const PetscMPIInt *ranks;
72 
73   PetscFunctionBegin;
74   /* Look for types in cache */
75   for (link = w->link; link; link = link->next) {
76     PetscBool match;
77     PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
78     if (match) {
79       *localtypes  = link->mine;
80       *remotetypes = link->remote;
81       PetscFunctionReturn(0);
82     }
83   }
84 
85   /* Create new composite types for each send rank */
86   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote));
87   PetscCall(PetscNew(&link));
88   PetscCallMPI(MPI_Type_dup(unit, &link->unit));
89   PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote));
90   for (i = 0; i < nranks; i++) {
91     PetscInt     rcount = roffset[i + 1] - roffset[i];
92     PetscMPIInt *rmine, *rremote;
93 #if !defined(PETSC_USE_64BIT_INDICES)
94     rmine   = sf->rmine + sf->roffset[i];
95     rremote = sf->rremote + sf->roffset[i];
96 #else
97     PetscInt j;
98     PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote));
99     for (j = 0; j < rcount; j++) {
100       PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j));
101       PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j));
102     }
103 #endif
104 
105     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]));
106     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]));
107 #if defined(PETSC_USE_64BIT_INDICES)
108     PetscCall(PetscFree2(rmine, rremote));
109 #endif
110     PetscCallMPI(MPI_Type_commit(&link->mine[i]));
111     PetscCallMPI(MPI_Type_commit(&link->remote[i]));
112   }
113   link->next = w->link;
114   w->link    = link;
115 
116   *localtypes  = link->mine;
117   *remotetypes = link->remote;
118   PetscFunctionReturn(0);
119 }
120 
121 /*@C
122    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
123 
124    Logically Collective
125 
126    Input Parameters:
127 +  sf - star forest for communication
128 -  flavor - flavor type
129 
130    Options Database Key:
131 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
132 
133    Level: advanced
134 
135    Notes: Windows reusage follow this rules:
136 
137      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
138 
139      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
140        for i=1 to K
141          PetscSFOperationBegin(rootdata1,leafdata_whatever);
142          PetscSFOperationEnd(rootdata1,leafdata_whatever);
143          ...
144          PetscSFOperationBegin(rootdataN,leafdata_whatever);
145          PetscSFOperationEnd(rootdataN,leafdata_whatever);
146        endfor
147        The following pattern will instead raise an error
148          PetscSFOperationBegin(rootdata1,leafdata_whatever);
149          PetscSFOperationEnd(rootdata1,leafdata_whatever);
150          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
151          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
152 
153      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
154 
155      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
156 
157 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
158 @*/
159 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor)
160 {
161   PetscFunctionBegin;
162   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
163   PetscValidLogicalCollectiveEnum(sf, flavor, 2);
164   PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
165   PetscFunctionReturn(0);
166 }
167 
168 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor)
169 {
170   PetscSF_Window *w = (PetscSF_Window *)sf->data;
171 
172   PetscFunctionBegin;
173   w->flavor = flavor;
174   PetscFunctionReturn(0);
175 }
176 
177 /*@C
178    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
179 
180    Logically Collective
181 
182    Input Parameter:
183 .  sf - star forest for communication
184 
185    Output Parameter:
186 .  flavor - flavor type
187 
188    Level: advanced
189 
190 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
191 @*/
192 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor)
193 {
194   PetscFunctionBegin;
195   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
196   PetscValidPointer(flavor, 2);
197   PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
198   PetscFunctionReturn(0);
199 }
200 
201 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor)
202 {
203   PetscSF_Window *w = (PetscSF_Window *)sf->data;
204 
205   PetscFunctionBegin;
206   *flavor = w->flavor;
207   PetscFunctionReturn(0);
208 }
209 
210 /*@C
211    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
212 
213    Logically Collective
214 
215    Input Parameters:
216 +  sf - star forest for communication
217 -  sync - synchronization type
218 
219    Options Database Key:
220 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
221 
222    Level: advanced
223 
224 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`
225 @*/
226 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync)
227 {
228   PetscFunctionBegin;
229   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
230   PetscValidLogicalCollectiveEnum(sf, sync, 2);
231   PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync)
236 {
237   PetscSF_Window *w = (PetscSF_Window *)sf->data;
238 
239   PetscFunctionBegin;
240   w->sync = sync;
241   PetscFunctionReturn(0);
242 }
243 
244 /*@C
245    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
246 
247    Logically Collective
248 
249    Input Parameter:
250 .  sf - star forest for communication
251 
252    Output Parameter:
253 .  sync - synchronization type
254 
255    Level: advanced
256 
257 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`
258 @*/
259 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync)
260 {
261   PetscFunctionBegin;
262   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
263   PetscValidPointer(sync, 2);
264   PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync)
269 {
270   PetscSF_Window *w = (PetscSF_Window *)sf->data;
271 
272   PetscFunctionBegin;
273   *sync = w->sync;
274   PetscFunctionReturn(0);
275 }
276 
277 /*@C
278    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
279 
280    Logically Collective
281 
282    Input Parameters:
283 +  sf - star forest for communication
284 -  info - MPI_Info handle
285 
286    Level: advanced
287 
288    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
289 
290 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
291 @*/
292 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info)
293 {
294   PetscFunctionBegin;
295   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
296   PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info)
301 {
302   PetscSF_Window *w = (PetscSF_Window *)sf->data;
303 
304   PetscFunctionBegin;
305   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
306   if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info));
307   PetscFunctionReturn(0);
308 }
309 
310 /*@C
311    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
312 
313    Logically Collective
314 
315    Input Parameter:
316 .  sf - star forest for communication
317 
318    Output Parameter:
319 .  info - MPI_Info handle
320 
321    Level: advanced
322 
323    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
324 
325 .seealso: `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
326 @*/
327 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info)
328 {
329   PetscFunctionBegin;
330   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
331   PetscValidPointer(info, 2);
332   PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
333   PetscFunctionReturn(0);
334 }
335 
336 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info)
337 {
338   PetscSF_Window *w = (PetscSF_Window *)sf->data;
339 
340   PetscFunctionBegin;
341   *info = w->info;
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346    PetscSFGetWindow - Get a window for use with a given data type
347 
348    Collective on PetscSF
349 
350    Input Parameters:
351 +  sf - star forest
352 .  unit - data type
353 .  array - array to be sent
354 .  sync - type of synchronization PetscSFWindowSyncType
355 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
356 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
357 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
358 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
359 
360    Output Parameters:
361 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
362 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
363 -  win - window
364 
365    Level: developer
366 .seealso: `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
367 */
368 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
369 {
370   PetscSF_Window *w = (PetscSF_Window *)sf->data;
371   MPI_Aint        lb, lb_true, bytes, bytes_true;
372   PetscSFWinLink  link;
373 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
374   MPI_Aint winaddr;
375   PetscInt nranks;
376 #endif
377   PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
378   PetscBool dummy[2];
379   MPI_Aint  wsize;
380 
381   PetscFunctionBegin;
382   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
383   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
384   PetscCheck(lb == 0 && lb_true == 0, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
385   PetscCheck(bytes == bytes_true, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
386   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
387   for (link = w->wins; reuse && link; link = link->next) {
388     PetscBool winok = PETSC_FALSE;
389     if (w->flavor != link->flavor) continue;
390     switch (w->flavor) {
391     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
392       if (array == link->addr) {
393         if (PetscDefined(USE_DEBUG)) {
394           dummy[0] = PETSC_TRUE;
395           dummy[1] = PETSC_TRUE;
396           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
397           PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
398           PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
399         }
400         PetscCheck(!link->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window in use");
401         PetscCheck(!epoch || !link->epoch, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window epoch not finished");
402         winok       = PETSC_TRUE;
403         link->paddr = array;
404       } else if (PetscDefined(USE_DEBUG)) {
405         dummy[0] = PETSC_FALSE;
406         dummy[1] = PETSC_FALSE;
407         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
408         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
409         PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
410       }
411       break;
412     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
413     case PETSCSF_WINDOW_FLAVOR_SHARED:
414       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
415         update      = PETSC_TRUE;
416         link->paddr = array;
417         winok       = PETSC_TRUE;
418       }
419       break;
420     default:
421       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
422     }
423     if (winok) {
424       *win = link->win;
425       PetscCall(PetscInfo(sf, "Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
426       goto found;
427     }
428   }
429 
430   wsize = (MPI_Aint)bytes * sf->nroots;
431   PetscCall(PetscNew(&link));
432   link->bytes           = bytes;
433   link->next            = w->wins;
434   link->flavor          = w->flavor;
435   link->dyn_target_addr = NULL;
436   link->reqs            = NULL;
437   w->wins               = link;
438   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
439     PetscInt i;
440 
441     PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
442     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
443   }
444   switch (w->flavor) {
445   case PETSCSF_WINDOW_FLAVOR_CREATE:
446     PetscCallMPI(MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win));
447     link->addr  = array;
448     link->paddr = array;
449     break;
450 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
451   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
452     PetscCallMPI(MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win));
453   #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
454     PetscCallMPI(MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize));
455   #else
456     PetscCallMPI(MPI_Win_attach(link->win, array, wsize));
457   #endif
458     link->addr  = array;
459     link->paddr = array;
460     PetscCheck(w->dynsf, PetscObjectComm((PetscObject)sf), PETSC_ERR_ORDER, "Must call PetscSFSetUp()");
461     PetscCall(PetscSFSetUp(w->dynsf));
462     PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL));
463     PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr));
464     PetscCallMPI(MPI_Get_address(array, &winaddr));
465     PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
466     PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
467     break;
468   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
469     PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
470     update      = PETSC_TRUE;
471     link->paddr = array;
472     break;
473 #endif
474 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
475   case PETSCSF_WINDOW_FLAVOR_SHARED:
476     PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
477     update      = PETSC_TRUE;
478     link->paddr = array;
479     break;
480 #endif
481   default:
482     SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
483   }
484   PetscCall(PetscInfo(sf, "New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
485   *win = link->win;
486 
487 found:
488 
489   if (target_disp) *target_disp = link->dyn_target_addr;
490   if (reqs) *reqs = link->reqs;
491   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
492     PetscMPIInt rank;
493 
494     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
495     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win));
496     PetscCall(PetscMemcpy(link->addr, array, sf->nroots * bytes));
497     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
498       PetscCallMPI(MPI_Win_unlock(rank, *win));
499       PetscCallMPI(MPI_Win_fence(0, *win));
500     }
501   }
502   link->inuse = PETSC_TRUE;
503   link->epoch = epoch;
504   if (epoch) {
505     switch (sync) {
506     case PETSCSF_WINDOW_SYNC_FENCE:
507       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
508       break;
509     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
510       break;
511     case PETSCSF_WINDOW_SYNC_ACTIVE: {
512       MPI_Group   ingroup, outgroup;
513       PetscMPIInt isize, osize;
514 
515       /* OpenMPI 4.0.2 with btl=vader does not like calling
516          - MPI_Win_complete when ogroup is empty
517          - MPI_Win_wait when igroup is empty
518          So, we do not even issue the corresponding start and post calls
519          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
520          start(outgroup) has a matching post(ingroup)
521          and this is guaranteed by PetscSF
522       */
523       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
524       PetscCallMPI(MPI_Group_size(ingroup, &isize));
525       PetscCallMPI(MPI_Group_size(outgroup, &osize));
526       if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win));
527       if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win));
528     } break;
529     default:
530       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
531     }
532   }
533   PetscFunctionReturn(0);
534 }
535 
536 /*
537    PetscSFFindWindow - Finds a window that is already in use
538 
539    Not Collective
540 
541    Input Parameters:
542 +  sf - star forest
543 .  unit - data type
544 -  array - array with which the window is associated
545 
546    Output Parameters:
547 +  win - window
548 -  reqs - outstanding requests associated to the window
549 
550    Level: developer
551 
552 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
553 */
554 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs)
555 {
556   PetscSF_Window *w = (PetscSF_Window *)sf->data;
557   PetscSFWinLink  link;
558 
559   PetscFunctionBegin;
560   *win = MPI_WIN_NULL;
561   for (link = w->wins; link; link = link->next) {
562     if (array == link->paddr) {
563       PetscCall(PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
564       *win  = link->win;
565       *reqs = link->reqs;
566       PetscFunctionReturn(0);
567     }
568   }
569   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
570 }
571 
572 /*
573    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
574 
575    Collective
576 
577    Input Parameters:
578 +  sf - star forest
579 .  unit - data type
580 .  array - array associated with window
581 .  sync - type of synchronization PetscSFWindowSyncType
582 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
583 .  update - if we have to update the local window array
584 -  win - window
585 
586    Level: developer
587 
588 .seealso: `PetscSFFindWindow()`
589 */
590 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win)
591 {
592   PetscSF_Window         *w = (PetscSF_Window *)sf->data;
593   PetscSFWinLink         *p, link;
594   PetscBool               reuse = PETSC_FALSE;
595   PetscSFWindowFlavorType flavor;
596   void                   *laddr;
597   size_t                  bytes;
598 
599   PetscFunctionBegin;
600   for (p = &w->wins; *p; p = &(*p)->next) {
601     link = *p;
602     if (*win == link->win) {
603       PetscCheck(array == link->paddr, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array");
604       if (epoch != link->epoch) {
605         PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end");
606         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
607       }
608       laddr  = link->addr;
609       flavor = link->flavor;
610       bytes  = link->bytes;
611       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
612       else {
613         *p     = link->next;
614         update = PETSC_FALSE;
615       } /* remove from list */
616       goto found;
617     }
618   }
619   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
620 
621 found:
622   PetscCall(PetscInfo(sf, "Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n", link->win, link->flavor, PetscObjectComm((PetscObject)sf)));
623   if (epoch) {
624     switch (sync) {
625     case PETSCSF_WINDOW_SYNC_FENCE:
626       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
627       break;
628     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
629       break;
630     case PETSCSF_WINDOW_SYNC_ACTIVE: {
631       MPI_Group   ingroup, outgroup;
632       PetscMPIInt isize, osize;
633 
634       /* OpenMPI 4.0.2 with btl=wader does not like calling
635          - MPI_Win_complete when ogroup is empty
636          - MPI_Win_wait when igroup is empty
637          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
638          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
639          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
640       */
641       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
642       PetscCallMPI(MPI_Group_size(ingroup, &isize));
643       PetscCallMPI(MPI_Group_size(outgroup, &osize));
644       if (osize) PetscCallMPI(MPI_Win_complete(*win));
645       if (isize) PetscCallMPI(MPI_Win_wait(*win));
646     } break;
647     default:
648       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
649     }
650   }
651   if (update) {
652     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win));
653     PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes));
654   }
655   link->epoch = PETSC_FALSE;
656   link->inuse = PETSC_FALSE;
657   link->paddr = NULL;
658   if (!reuse) {
659     PetscCall(PetscFree(link->dyn_target_addr));
660     PetscCall(PetscFree(link->reqs));
661     PetscCallMPI(MPI_Win_free(&link->win));
662     PetscCall(PetscFree(link));
663     *win = MPI_WIN_NULL;
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
669 {
670   PetscSF_Window *w = (PetscSF_Window *)sf->data;
671   MPI_Group       ingroup, outgroup;
672 
673   PetscFunctionBegin;
674   PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY));
675   if (!w->dynsf) {
676     PetscInt     i;
677     PetscSFNode *remotes;
678 
679     PetscCall(PetscMalloc1(sf->nranks, &remotes));
680     for (i = 0; i < sf->nranks; i++) {
681       remotes[i].rank  = sf->ranks[i];
682       remotes[i].index = 0;
683     }
684     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf));
685     PetscCall(PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */
686     PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
687   }
688   switch (w->sync) {
689   case PETSCSF_WINDOW_SYNC_ACTIVE:
690     PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
691   default:
692     break;
693   }
694   PetscFunctionReturn(0);
695 }
696 
697 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject)
698 {
699   PetscSF_Window         *w      = (PetscSF_Window *)sf->data;
700   PetscSFWindowFlavorType flavor = w->flavor;
701 
702   PetscFunctionBegin;
703   PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
704   PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL));
705   PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL));
706   PetscCall(PetscSFWindowSetFlavorType(sf, flavor));
707   PetscOptionsHeadEnd();
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
712 {
713   PetscSF_Window *w = (PetscSF_Window *)sf->data;
714   PetscSFDataLink link, next;
715   PetscSFWinLink  wlink, wnext;
716   PetscInt        i;
717 
718   PetscFunctionBegin;
719   for (link = w->link; link; link = next) {
720     next = link->next;
721     PetscCallMPI(MPI_Type_free(&link->unit));
722     for (i = 0; i < sf->nranks; i++) {
723       PetscCallMPI(MPI_Type_free(&link->mine[i]));
724       PetscCallMPI(MPI_Type_free(&link->remote[i]));
725     }
726     PetscCall(PetscFree2(link->mine, link->remote));
727     PetscCall(PetscFree(link));
728   }
729   w->link = NULL;
730   for (wlink = w->wins; wlink; wlink = wnext) {
731     wnext = wlink->next;
732     PetscCheck(!wlink->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr);
733     PetscCall(PetscFree(wlink->dyn_target_addr));
734     PetscCall(PetscFree(wlink->reqs));
735     PetscCallMPI(MPI_Win_free(&wlink->win));
736     PetscCall(PetscFree(wlink));
737   }
738   w->wins = NULL;
739   PetscCall(PetscSFDestroy(&w->dynsf));
740   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
741   PetscFunctionReturn(0);
742 }
743 
744 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
745 {
746   PetscFunctionBegin;
747   PetscCall(PetscSFReset_Window(sf));
748   PetscCall(PetscFree(sf->data));
749   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL));
750   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL));
751   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL));
752   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL));
753   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL));
754   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL));
755   PetscFunctionReturn(0);
756 }
757 
758 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer)
759 {
760   PetscSF_Window   *w = (PetscSF_Window *)sf->data;
761   PetscBool         iascii;
762   PetscViewerFormat format;
763 
764   PetscFunctionBegin;
765   PetscCall(PetscViewerGetFormat(viewer, &format));
766   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
767   if (iascii) {
768     PetscCall(PetscViewerASCIIPrintf(viewer, "  current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered"));
769     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
770       if (w->info != MPI_INFO_NULL) {
771         PetscMPIInt k, nkeys;
772         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
773 
774         PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys));
775         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info with %d keys. Ordered key-value pairs follow:\n", nkeys));
776         for (k = 0; k < nkeys; k++) {
777           PetscMPIInt flag;
778 
779           PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key));
780           PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag));
781           PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key);
782           PetscCall(PetscViewerASCIIPrintf(viewer, "      %s = %s\n", key, value));
783         }
784       } else {
785         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info=MPI_INFO_NULL\n"));
786       }
787     }
788   }
789   PetscFunctionReturn(0);
790 }
791 
792 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf)
793 {
794   PetscSF_Window       *w = (PetscSF_Window *)sf->data;
795   PetscSFWindowSyncType synctype;
796 
797   PetscFunctionBegin;
798   synctype = w->sync;
799   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
800   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
801   PetscCall(PetscSFWindowSetSyncType(newsf, synctype));
802   PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor));
803   PetscCall(PetscSFWindowSetInfo(newsf, w->info));
804   PetscFunctionReturn(0);
805 }
806 
807 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
808 {
809   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
810   PetscInt            i, nranks;
811   const PetscMPIInt  *ranks;
812   const MPI_Aint     *target_disp;
813   const MPI_Datatype *mine, *remote;
814   MPI_Request        *reqs;
815   MPI_Win             win;
816 
817   PetscFunctionBegin;
818   PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
819   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
820   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
821   PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win));
822   for (i = 0; i < nranks; i++) {
823     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
824 
825     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
826       PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
827 #if defined(PETSC_HAVE_MPI_RGET)
828       PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]));
829 #else
830       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
831 #endif
832     } else {
833       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
834     }
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
840 {
841   PetscSF_Window *w = (PetscSF_Window *)sf->data;
842   MPI_Win         win;
843   MPI_Request    *reqs = NULL;
844 
845   PetscFunctionBegin;
846   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
847   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
848   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
849     PetscInt           i, nranks;
850     const PetscMPIInt *ranks;
851 
852     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
853     for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
854   }
855   PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win));
856   PetscFunctionReturn(0);
857 }
858 
859 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
860 {
861   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
862   PetscInt            i, nranks;
863   const PetscMPIInt  *ranks;
864   const MPI_Aint     *target_disp;
865   const MPI_Datatype *mine, *remote;
866   MPI_Win             win;
867 
868   PetscFunctionBegin;
869   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
870   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
871   PetscCall(PetscSFWindowOpTranslate(&op));
872   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
873   for (i = 0; i < nranks; i++) {
874     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
875 
876     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
877     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
878     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
879   }
880   PetscFunctionReturn(0);
881 }
882 
883 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
884 {
885   PetscSF_Window *w = (PetscSF_Window *)sf->data;
886   MPI_Win         win;
887   MPI_Request    *reqs = NULL;
888 
889   PetscFunctionBegin;
890   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
891   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
892   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
893   PetscFunctionReturn(0);
894 }
895 
896 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
897 {
898   PetscInt            i, nranks;
899   const PetscMPIInt  *ranks;
900   const MPI_Datatype *mine, *remote;
901   const MPI_Aint     *target_disp;
902   MPI_Win             win;
903   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
904 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
905   PetscSFWindowFlavorType oldf;
906 #endif
907 
908   PetscFunctionBegin;
909   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
910   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
911   PetscCall(PetscSFWindowOpTranslate(&op));
912 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
913   /* FetchAndOp without MPI_Get_Accumulate requires locking.
914      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
915   oldf      = w->flavor;
916   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
917   PetscCall(PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win));
918 #else
919   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
920 #endif
921   for (i = 0; i < nranks; i++) {
922     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
923 
924 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
925     PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win));
926     PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
927     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
928     PetscCallMPI(MPI_Win_unlock(ranks[i], win));
929 #else
930     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win));
931     PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
932     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
933 #endif
934   }
935 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
936   w->flavor = oldf;
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
942 {
943   MPI_Win win;
944 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
945   PetscSF_Window *w = (PetscSF_Window *)sf->data;
946 #endif
947   MPI_Request *reqs = NULL;
948 
949   PetscFunctionBegin;
950   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
951   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
952 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
953   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
954 #else
955   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win));
956 #endif
957   PetscFunctionReturn(0);
958 }
959 
960 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
961 {
962   PetscSF_Window *w = (PetscSF_Window *)sf->data;
963 
964   PetscFunctionBegin;
965   sf->ops->SetUp           = PetscSFSetUp_Window;
966   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
967   sf->ops->Reset           = PetscSFReset_Window;
968   sf->ops->Destroy         = PetscSFDestroy_Window;
969   sf->ops->View            = PetscSFView_Window;
970   sf->ops->Duplicate       = PetscSFDuplicate_Window;
971   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
972   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
973   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
974   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
975   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
976   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
977 
978   PetscCall(PetscNew(&w));
979   sf->data  = (void *)w;
980   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
981   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
982   w->info   = MPI_INFO_NULL;
983 
984   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window));
985   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window));
986   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window));
987   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window));
988   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window));
989   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window));
990 
991 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
992   {
993     PetscBool ackbug = PETSC_FALSE;
994     PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL));
995     if (ackbug) {
996       PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
997     } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
998   }
999 #endif
1000   PetscFunctionReturn(0);
1001 }
1002