xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision feefa0e191a340680bb02e1467a36facdcb0b150)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype   *mine;
18   MPI_Datatype   *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                   *addr;
26   void                   *paddr;
27   MPI_Win                 win;
28   MPI_Request            *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint               *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[]   = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40 {
41   PetscFunctionBegin;
42   if (*op == MPIU_SUM) *op = MPI_SUM;
43   else if (*op == MPIU_MAX) *op = MPI_MAX;
44   else if (*op == MPIU_MIN) *op = MPI_MIN;
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /*@C
49    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50 
51    Not Collective
52 
53    Input Parameters:
54 +  sf - star forest of type `PETSCSFWINDOW`
55 -  unit - data type for each node
56 
57    Output Parameters:
58 +  localtypes - types describing part of local leaf buffer referencing each remote rank
59 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
60 
61    Level: developer
62 
63 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()`
64 @*/
65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes)
66 {
67   PetscSF_Window    *w = (PetscSF_Window *)sf->data;
68   PetscSFDataLink    link;
69   PetscInt           i, nranks;
70   const PetscInt    *roffset, *rmine, *rremote;
71   const PetscMPIInt *ranks;
72 
73   PetscFunctionBegin;
74   /* Look for types in cache */
75   for (link = w->link; link; link = link->next) {
76     PetscBool match;
77     PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
78     if (match) {
79       *localtypes  = link->mine;
80       *remotetypes = link->remote;
81       PetscFunctionReturn(PETSC_SUCCESS);
82     }
83   }
84 
85   /* Create new composite types for each send rank */
86   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote));
87   PetscCall(PetscNew(&link));
88   PetscCallMPI(MPI_Type_dup(unit, &link->unit));
89   PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote));
90   for (i = 0; i < nranks; i++) {
91     PetscInt     rcount = roffset[i + 1] - roffset[i];
92     PetscMPIInt *rmine, *rremote;
93 #if !defined(PETSC_USE_64BIT_INDICES)
94     rmine   = sf->rmine + sf->roffset[i];
95     rremote = sf->rremote + sf->roffset[i];
96 #else
97     PetscInt j;
98     PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote));
99     for (j = 0; j < rcount; j++) {
100       PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j));
101       PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j));
102     }
103 #endif
104 
105     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]));
106     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]));
107 #if defined(PETSC_USE_64BIT_INDICES)
108     PetscCall(PetscFree2(rmine, rremote));
109 #endif
110     PetscCallMPI(MPI_Type_commit(&link->mine[i]));
111     PetscCallMPI(MPI_Type_commit(&link->remote[i]));
112   }
113   link->next = w->link;
114   w->link    = link;
115 
116   *localtypes  = link->mine;
117   *remotetypes = link->remote;
118   PetscFunctionReturn(PETSC_SUCCESS);
119 }
120 
121 /*@C
122   PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation
123 
124   Logically Collective
125 
126   Input Parameters:
127 + sf     - star forest for communication of type `PETSCSFWINDOW`
128 - flavor - flavor type
129 
130   Options Database Key:
131 . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`)
132 
133   Level: advanced
134 
135   Notes:
136 
137   Windows reuse follows these rules:
138 .vb
139      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
140 
141      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
142        for i=1 to K
143          PetscSFOperationBegin(rootdata1,leafdata_whatever);
144          PetscSFOperationEnd(rootdata1,leafdata_whatever);
145          ...
146          PetscSFOperationBegin(rootdataN,leafdata_whatever);
147          PetscSFOperationEnd(rootdataN,leafdata_whatever);
148        endfor
149        The following pattern will instead raise an error
150          PetscSFOperationBegin(rootdata1,leafdata_whatever);
151          PetscSFOperationEnd(rootdata1,leafdata_whatever);
152          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
153          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
154 
155      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
156 
157      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
158 .ve
159 
160 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
161 @*/
162 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor)
163 {
164   PetscFunctionBegin;
165   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
166   PetscValidLogicalCollectiveEnum(sf, flavor, 2);
167   PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
168   PetscFunctionReturn(PETSC_SUCCESS);
169 }
170 
171 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor)
172 {
173   PetscSF_Window *w = (PetscSF_Window *)sf->data;
174 
175   PetscFunctionBegin;
176   w->flavor = flavor;
177   PetscFunctionReturn(PETSC_SUCCESS);
178 }
179 
180 /*@C
181   PetscSFWindowGetFlavorType - Get  `PETSCSFWINDOW` flavor type for `PetscSF` communication
182 
183   Logically Collective
184 
185   Input Parameter:
186 . sf - star forest for communication of type `PETSCSFWINDOW`
187 
188   Output Parameter:
189 . flavor - flavor type
190 
191   Level: advanced
192 
193 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
194 @*/
195 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor)
196 {
197   PetscFunctionBegin;
198   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
199   PetscValidPointer(flavor, 2);
200   PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
201   PetscFunctionReturn(PETSC_SUCCESS);
202 }
203 
204 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor)
205 {
206   PetscSF_Window *w = (PetscSF_Window *)sf->data;
207 
208   PetscFunctionBegin;
209   *flavor = w->flavor;
210   PetscFunctionReturn(PETSC_SUCCESS);
211 }
212 
213 /*@C
214   PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type  `PETSCSFWINDOW`
215 
216   Logically Collective
217 
218   Input Parameters:
219 + sf   - star forest for communication
220 - sync - synchronization type
221 
222   Options Database Key:
223 . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`)
224 
225   Level: advanced
226 
227 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType`
228 @*/
229 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync)
230 {
231   PetscFunctionBegin;
232   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
233   PetscValidLogicalCollectiveEnum(sf, sync, 2);
234   PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
235   PetscFunctionReturn(PETSC_SUCCESS);
236 }
237 
238 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync)
239 {
240   PetscSF_Window *w = (PetscSF_Window *)sf->data;
241 
242   PetscFunctionBegin;
243   w->sync = sync;
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 /*@C
248   PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW`
249 
250   Logically Collective
251 
252   Input Parameter:
253 . sf - star forest for communication
254 
255   Output Parameter:
256 . sync - synchronization type
257 
258   Level: advanced
259 
260 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType`
261 @*/
262 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync)
263 {
264   PetscFunctionBegin;
265   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
266   PetscValidPointer(sync, 2);
267   PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync)
272 {
273   PetscSF_Window *w = (PetscSF_Window *)sf->data;
274 
275   PetscFunctionBegin;
276   *sync = w->sync;
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
280 /*@C
281   PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation
282 
283   Logically Collective
284 
285   Input Parameters:
286 + sf   - star forest for communication
287 - info - `MPI_Info` handle
288 
289   Level: advanced
290 
291   Note:
292   The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`.
293 
294 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
295 @*/
296 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info)
297 {
298   PetscFunctionBegin;
299   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
300   PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info)
305 {
306   PetscSF_Window *w = (PetscSF_Window *)sf->data;
307 
308   PetscFunctionBegin;
309   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
310   if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info));
311   PetscFunctionReturn(PETSC_SUCCESS);
312 }
313 
314 /*@C
315   PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation
316 
317   Logically Collective
318 
319   Input Parameter:
320 . sf - star forest for communication
321 
322   Output Parameter:
323 . info - `MPI_Info` handle
324 
325   Level: advanced
326 
327   Note:
328   If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL`
329 
330 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
331 @*/
332 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info)
333 {
334   PetscFunctionBegin;
335   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
336   PetscValidPointer(info, 2);
337   PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340 
341 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info)
342 {
343   PetscSF_Window *w = (PetscSF_Window *)sf->data;
344 
345   PetscFunctionBegin;
346   *info = w->info;
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 /*
351    PetscSFGetWindow - Get a window for use with a given data type
352 
353    Collective
354 
355    Input Parameters:
356 +  sf - star forest
357 .  unit - data type
358 .  array - array to be sent
359 .  sync - type of synchronization `PetscSFWindowSyncType`
360 .  epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window
361 .  fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE`
362 .  postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
363 -  startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
364 
365    Output Parameters:
366 +  target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only)
367 +  reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only)
368 -  win - window
369 
370    Level: developer
371 
372 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
373 */
374 static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
375 {
376   PetscSF_Window *w = (PetscSF_Window *)sf->data;
377   MPI_Aint        lb, lb_true, bytes, bytes_true;
378   PetscSFWinLink  link;
379 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
380   MPI_Aint winaddr;
381   PetscInt nranks;
382 #endif
383   PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
384   PetscBool dummy[2];
385   MPI_Aint  wsize;
386 
387   PetscFunctionBegin;
388   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
389   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
390   PetscCheck(lb == 0 && lb_true == 0, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
391   PetscCheck(bytes == bytes_true, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
392   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
393   for (link = w->wins; reuse && link; link = link->next) {
394     PetscBool winok = PETSC_FALSE;
395     if (w->flavor != link->flavor) continue;
396     switch (w->flavor) {
397     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
398       if (array == link->addr) {
399         if (PetscDefined(USE_DEBUG)) {
400           dummy[0] = PETSC_TRUE;
401           dummy[1] = PETSC_TRUE;
402           PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
403           PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
404           PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
405         }
406         PetscCheck(!link->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window in use");
407         PetscCheck(!epoch || !link->epoch, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window epoch not finished");
408         winok       = PETSC_TRUE;
409         link->paddr = array;
410       } else if (PetscDefined(USE_DEBUG)) {
411         dummy[0] = PETSC_FALSE;
412         dummy[1] = PETSC_FALSE;
413         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
414         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
415         PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
416       }
417       break;
418     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
419     case PETSCSF_WINDOW_FLAVOR_SHARED:
420       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
421         update      = PETSC_TRUE;
422         link->paddr = array;
423         winok       = PETSC_TRUE;
424       }
425       break;
426     default:
427       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
428     }
429     if (winok) {
430       *win = link->win;
431       PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
432       goto found;
433     }
434   }
435 
436   wsize = (MPI_Aint)bytes * sf->nroots;
437   PetscCall(PetscNew(&link));
438   link->bytes           = bytes;
439   link->next            = w->wins;
440   link->flavor          = w->flavor;
441   link->dyn_target_addr = NULL;
442   link->reqs            = NULL;
443   w->wins               = link;
444   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
445     PetscInt i;
446 
447     PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
448     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
449   }
450   switch (w->flavor) {
451   case PETSCSF_WINDOW_FLAVOR_CREATE:
452     PetscCallMPI(MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win));
453     link->addr  = array;
454     link->paddr = array;
455     break;
456 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
457   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
458     PetscCallMPI(MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win));
459   #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
460     PetscCallMPI(MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize));
461   #else
462     PetscCallMPI(MPI_Win_attach(link->win, array, wsize));
463   #endif
464     link->addr  = array;
465     link->paddr = array;
466     PetscCheck(w->dynsf, PetscObjectComm((PetscObject)sf), PETSC_ERR_ORDER, "Must call PetscSFSetUp()");
467     PetscCall(PetscSFSetUp(w->dynsf));
468     PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL));
469     PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr));
470     PetscCallMPI(MPI_Get_address(array, &winaddr));
471     PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
472     PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
473     break;
474   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
475     PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
476     update      = PETSC_TRUE;
477     link->paddr = array;
478     break;
479 #endif
480 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
481   case PETSCSF_WINDOW_FLAVOR_SHARED:
482     PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
483     update      = PETSC_TRUE;
484     link->paddr = array;
485     break;
486 #endif
487   default:
488     SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
489   }
490   PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
491   *win = link->win;
492 
493 found:
494 
495   if (target_disp) *target_disp = link->dyn_target_addr;
496   if (reqs) *reqs = link->reqs;
497   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
498     PetscMPIInt rank;
499 
500     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
501     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win));
502     PetscCall(PetscMemcpy(link->addr, array, sf->nroots * bytes));
503     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
504       PetscCallMPI(MPI_Win_unlock(rank, *win));
505       PetscCallMPI(MPI_Win_fence(0, *win));
506     }
507   }
508   link->inuse = PETSC_TRUE;
509   link->epoch = epoch;
510   if (epoch) {
511     switch (sync) {
512     case PETSCSF_WINDOW_SYNC_FENCE:
513       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
514       break;
515     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
516       break;
517     case PETSCSF_WINDOW_SYNC_ACTIVE: {
518       MPI_Group   ingroup, outgroup;
519       PetscMPIInt isize, osize;
520 
521       /* OpenMPI 4.0.2 with btl=vader does not like calling
522          - MPI_Win_complete when ogroup is empty
523          - MPI_Win_wait when igroup is empty
524          So, we do not even issue the corresponding start and post calls
525          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
526          start(outgroup) has a matching post(ingroup)
527          and this is guaranteed by PetscSF
528       */
529       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
530       PetscCallMPI(MPI_Group_size(ingroup, &isize));
531       PetscCallMPI(MPI_Group_size(outgroup, &osize));
532       if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win));
533       if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win));
534     } break;
535     default:
536       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
537     }
538   }
539   PetscFunctionReturn(PETSC_SUCCESS);
540 }
541 
542 /*
543    PetscSFFindWindow - Finds a window that is already in use
544 
545    Not Collective
546 
547    Input Parameters:
548 +  sf - star forest
549 .  unit - data type
550 -  array - array with which the window is associated
551 
552    Output Parameters:
553 +  win - window
554 -  reqs - outstanding requests associated to the window
555 
556    Level: developer
557 
558 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
559 */
560 static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs)
561 {
562   PetscSF_Window *w = (PetscSF_Window *)sf->data;
563   PetscSFWinLink  link;
564 
565   PetscFunctionBegin;
566   *win = MPI_WIN_NULL;
567   for (link = w->wins; link; link = link->next) {
568     if (array == link->paddr) {
569       PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
570       *win  = link->win;
571       *reqs = link->reqs;
572       PetscFunctionReturn(PETSC_SUCCESS);
573     }
574   }
575   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
576 }
577 
578 /*
579    PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()`
580 
581    Collective
582 
583    Input Parameters:
584 +  sf - star forest
585 .  unit - data type
586 .  array - array associated with window
587 .  sync - type of synchronization `PetscSFWindowSyncType`
588 .  epoch - close an epoch, must match argument to `PetscSFGetWindow()`
589 .  update - if we have to update the local window array
590 -  win - window
591 
592    Level: developer
593 
594 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()`
595 */
596 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win)
597 {
598   PetscSF_Window         *w = (PetscSF_Window *)sf->data;
599   PetscSFWinLink         *p, link;
600   PetscBool               reuse = PETSC_FALSE;
601   PetscSFWindowFlavorType flavor;
602   void                   *laddr;
603   size_t                  bytes;
604 
605   PetscFunctionBegin;
606   for (p = &w->wins; *p; p = &(*p)->next) {
607     link = *p;
608     if (*win == link->win) {
609       PetscCheck(array == link->paddr, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array");
610       if (epoch != link->epoch) {
611         PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end");
612         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
613       }
614       laddr  = link->addr;
615       flavor = link->flavor;
616       bytes  = link->bytes;
617       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
618       else {
619         *p     = link->next;
620         update = PETSC_FALSE;
621       } /* remove from list */
622       goto found;
623     }
624   }
625   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
626 
627 found:
628   PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
629   if (epoch) {
630     switch (sync) {
631     case PETSCSF_WINDOW_SYNC_FENCE:
632       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
633       break;
634     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
635       break;
636     case PETSCSF_WINDOW_SYNC_ACTIVE: {
637       MPI_Group   ingroup, outgroup;
638       PetscMPIInt isize, osize;
639 
640       /* OpenMPI 4.0.2 with btl=wader does not like calling
641          - MPI_Win_complete when ogroup is empty
642          - MPI_Win_wait when igroup is empty
643          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
644          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
645          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
646       */
647       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
648       PetscCallMPI(MPI_Group_size(ingroup, &isize));
649       PetscCallMPI(MPI_Group_size(outgroup, &osize));
650       if (osize) PetscCallMPI(MPI_Win_complete(*win));
651       if (isize) PetscCallMPI(MPI_Win_wait(*win));
652     } break;
653     default:
654       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
655     }
656   }
657   if (update) {
658     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win));
659     PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes));
660   }
661   link->epoch = PETSC_FALSE;
662   link->inuse = PETSC_FALSE;
663   link->paddr = NULL;
664   if (!reuse) {
665     PetscCall(PetscFree(link->dyn_target_addr));
666     PetscCall(PetscFree(link->reqs));
667     PetscCallMPI(MPI_Win_free(&link->win));
668     PetscCall(PetscFree(link));
669     *win = MPI_WIN_NULL;
670   }
671   PetscFunctionReturn(PETSC_SUCCESS);
672 }
673 
674 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
675 {
676   PetscSF_Window *w = (PetscSF_Window *)sf->data;
677   MPI_Group       ingroup, outgroup;
678 
679   PetscFunctionBegin;
680   PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY));
681   if (!w->dynsf) {
682     PetscInt     i;
683     PetscSFNode *remotes;
684 
685     PetscCall(PetscMalloc1(sf->nranks, &remotes));
686     for (i = 0; i < sf->nranks; i++) {
687       remotes[i].rank  = sf->ranks[i];
688       remotes[i].index = 0;
689     }
690     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf));
691     PetscCall(PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */
692     PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
693   }
694   switch (w->sync) {
695   case PETSCSF_WINDOW_SYNC_ACTIVE:
696     PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
697   default:
698     break;
699   }
700   PetscFunctionReturn(PETSC_SUCCESS);
701 }
702 
703 static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject)
704 {
705   PetscSF_Window         *w      = (PetscSF_Window *)sf->data;
706   PetscSFWindowFlavorType flavor = w->flavor;
707 
708   PetscFunctionBegin;
709   PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
710   PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL));
711   PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL));
712   PetscCall(PetscSFWindowSetFlavorType(sf, flavor));
713   PetscOptionsHeadEnd();
714   PetscFunctionReturn(PETSC_SUCCESS);
715 }
716 
717 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
718 {
719   PetscSF_Window *w = (PetscSF_Window *)sf->data;
720   PetscSFDataLink link, next;
721   PetscSFWinLink  wlink, wnext;
722   PetscInt        i;
723 
724   PetscFunctionBegin;
725   for (link = w->link; link; link = next) {
726     next = link->next;
727     PetscCallMPI(MPI_Type_free(&link->unit));
728     for (i = 0; i < sf->nranks; i++) {
729       PetscCallMPI(MPI_Type_free(&link->mine[i]));
730       PetscCallMPI(MPI_Type_free(&link->remote[i]));
731     }
732     PetscCall(PetscFree2(link->mine, link->remote));
733     PetscCall(PetscFree(link));
734   }
735   w->link = NULL;
736   for (wlink = w->wins; wlink; wlink = wnext) {
737     wnext = wlink->next;
738     PetscCheck(!wlink->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr);
739     PetscCall(PetscFree(wlink->dyn_target_addr));
740     PetscCall(PetscFree(wlink->reqs));
741     PetscCallMPI(MPI_Win_free(&wlink->win));
742     PetscCall(PetscFree(wlink));
743   }
744   w->wins = NULL;
745   PetscCall(PetscSFDestroy(&w->dynsf));
746   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
747   PetscFunctionReturn(PETSC_SUCCESS);
748 }
749 
750 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
751 {
752   PetscFunctionBegin;
753   PetscCall(PetscSFReset_Window(sf));
754   PetscCall(PetscFree(sf->data));
755   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL));
756   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL));
757   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL));
758   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL));
759   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL));
760   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL));
761   PetscFunctionReturn(PETSC_SUCCESS);
762 }
763 
764 static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer)
765 {
766   PetscSF_Window   *w = (PetscSF_Window *)sf->data;
767   PetscBool         iascii;
768   PetscViewerFormat format;
769 
770   PetscFunctionBegin;
771   PetscCall(PetscViewerGetFormat(viewer, &format));
772   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
773   if (iascii) {
774     PetscCall(PetscViewerASCIIPrintf(viewer, "  current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered"));
775     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
776       if (w->info != MPI_INFO_NULL) {
777         PetscMPIInt k, nkeys;
778         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
779 
780         PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys));
781         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info with %d keys. Ordered key-value pairs follow:\n", nkeys));
782         for (k = 0; k < nkeys; k++) {
783           PetscMPIInt flag;
784 
785           PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key));
786           PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag));
787           PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key);
788           PetscCall(PetscViewerASCIIPrintf(viewer, "      %s = %s\n", key, value));
789         }
790       } else {
791         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info=MPI_INFO_NULL\n"));
792       }
793     }
794   }
795   PetscFunctionReturn(PETSC_SUCCESS);
796 }
797 
798 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf)
799 {
800   PetscSF_Window       *w = (PetscSF_Window *)sf->data;
801   PetscSFWindowSyncType synctype;
802 
803   PetscFunctionBegin;
804   synctype = w->sync;
805   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
806   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
807   PetscCall(PetscSFWindowSetSyncType(newsf, synctype));
808   PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor));
809   PetscCall(PetscSFWindowSetInfo(newsf, w->info));
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
814 {
815   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
816   PetscInt            i, nranks;
817   const PetscMPIInt  *ranks;
818   const MPI_Aint     *target_disp;
819   const MPI_Datatype *mine, *remote;
820   MPI_Request        *reqs;
821   MPI_Win             win;
822 
823   PetscFunctionBegin;
824   PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
825   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
826   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
827   PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win));
828   for (i = 0; i < nranks; i++) {
829     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
830 
831     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
832       PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
833 #if defined(PETSC_HAVE_MPI_RGET)
834       PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]));
835 #else
836       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
837 #endif
838     } else {
839       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
840     }
841   }
842   PetscFunctionReturn(PETSC_SUCCESS);
843 }
844 
845 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
846 {
847   PetscSF_Window *w = (PetscSF_Window *)sf->data;
848   MPI_Win         win;
849   MPI_Request    *reqs = NULL;
850 
851   PetscFunctionBegin;
852   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
853   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
854   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
855     PetscInt           i, nranks;
856     const PetscMPIInt *ranks;
857 
858     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
859     for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
860   }
861   PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win));
862   PetscFunctionReturn(PETSC_SUCCESS);
863 }
864 
865 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
866 {
867   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
868   PetscInt            i, nranks;
869   const PetscMPIInt  *ranks;
870   const MPI_Aint     *target_disp;
871   const MPI_Datatype *mine, *remote;
872   MPI_Win             win;
873 
874   PetscFunctionBegin;
875   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
876   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
877   PetscCall(PetscSFWindowOpTranslate(&op));
878   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
879   for (i = 0; i < nranks; i++) {
880     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
881 
882     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
883     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
884     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
885   }
886   PetscFunctionReturn(PETSC_SUCCESS);
887 }
888 
889 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
890 {
891   PetscSF_Window *w = (PetscSF_Window *)sf->data;
892   MPI_Win         win;
893   MPI_Request    *reqs = NULL;
894 
895   PetscFunctionBegin;
896   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
897   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
898   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
899   PetscFunctionReturn(PETSC_SUCCESS);
900 }
901 
902 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
903 {
904   PetscInt            i, nranks;
905   const PetscMPIInt  *ranks;
906   const MPI_Datatype *mine, *remote;
907   const MPI_Aint     *target_disp;
908   MPI_Win             win;
909   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
910 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
911   PetscSFWindowFlavorType oldf;
912 #endif
913 
914   PetscFunctionBegin;
915   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
916   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
917   PetscCall(PetscSFWindowOpTranslate(&op));
918 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
919   /* FetchAndOp without MPI_Get_Accumulate requires locking.
920      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
921   oldf      = w->flavor;
922   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
923   PetscCall(PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win));
924 #else
925   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
926 #endif
927   for (i = 0; i < nranks; i++) {
928     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
929 
930 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
931     PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win));
932     PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
933     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
934     PetscCallMPI(MPI_Win_unlock(ranks[i], win));
935 #else
936     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win));
937     PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
938     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
939 #endif
940   }
941 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
942   w->flavor = oldf;
943 #endif
944   PetscFunctionReturn(PETSC_SUCCESS);
945 }
946 
947 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
948 {
949   MPI_Win win;
950 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
951   PetscSF_Window *w = (PetscSF_Window *)sf->data;
952 #endif
953   MPI_Request *reqs = NULL;
954 
955   PetscFunctionBegin;
956   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
957   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
958 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
959   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
960 #else
961   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win));
962 #endif
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
966 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
967 {
968   PetscSF_Window *w = (PetscSF_Window *)sf->data;
969 
970   PetscFunctionBegin;
971   sf->ops->SetUp           = PetscSFSetUp_Window;
972   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
973   sf->ops->Reset           = PetscSFReset_Window;
974   sf->ops->Destroy         = PetscSFDestroy_Window;
975   sf->ops->View            = PetscSFView_Window;
976   sf->ops->Duplicate       = PetscSFDuplicate_Window;
977   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
978   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
979   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
980   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
981   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
982   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
983 
984   PetscCall(PetscNew(&w));
985   sf->data  = (void *)w;
986   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
987   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
988   w->info   = MPI_INFO_NULL;
989 
990   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window));
991   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window));
992   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window));
993   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window));
994   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window));
995   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window));
996 
997 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
998   {
999     PetscBool ackbug = PETSC_FALSE;
1000     PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL));
1001     if (ackbug) {
1002       PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
1003     } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1004   }
1005 #endif
1006   PetscFunctionReturn(PETSC_SUCCESS);
1007 }
1008