xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision 98921bda46e76d7aaed9e0138c5ff9d0ce93f355)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype    *mine;
18   MPI_Datatype    *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                    *addr;
26   void                    *paddr;
27   MPI_Win                 win;
28   MPI_Request             *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint                *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40 {
41   PetscFunctionBegin;
42   if (*op == MPIU_SUM) *op = MPI_SUM;
43   else if (*op == MPIU_MAX) *op = MPI_MAX;
44   else if (*op == MPIU_MIN) *op = MPI_MIN;
45   PetscFunctionReturn(0);
46 }
47 
48 /*@C
49    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50 
51    Not Collective
52 
53    Input Parameters:
54 +  sf - star forest
55 -  unit - data type for each node
56 
57    Output Parameters:
58 +  localtypes - types describing part of local leaf buffer referencing each remote rank
59 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
60 
61    Level: developer
62 
63 .seealso: PetscSFSetGraph(), PetscSFView()
64 @*/
65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
66 {
67   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
68   PetscErrorCode    ierr;
69   PetscSFDataLink   link;
70   PetscInt          i,nranks;
71   const PetscInt    *roffset,*rmine,*rremote;
72   const PetscMPIInt *ranks;
73 
74   PetscFunctionBegin;
75   /* Look for types in cache */
76   for (link=w->link; link; link=link->next) {
77     PetscBool match;
78     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
79     if (match) {
80       *localtypes  = link->mine;
81       *remotetypes = link->remote;
82       PetscFunctionReturn(0);
83     }
84   }
85 
86   /* Create new composite types for each send rank */
87   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr);
88   ierr = PetscNew(&link);CHKERRQ(ierr);
89   ierr = MPI_Type_dup(unit,&link->unit);CHKERRMPI(ierr);
90   ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr);
91   for (i=0; i<nranks; i++) {
92     PetscInt    rcount = roffset[i+1] - roffset[i];
93     PetscMPIInt *rmine,*rremote;
94 #if !defined(PETSC_USE_64BIT_INDICES)
95     rmine   = sf->rmine + sf->roffset[i];
96     rremote = sf->rremote + sf->roffset[i];
97 #else
98     PetscInt j;
99     ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr);
100     for (j=0; j<rcount; j++) {
101       ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr);
102       ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr);
103     }
104 #endif
105 
106     ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRMPI(ierr);
107     ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRMPI(ierr);
108 #if defined(PETSC_USE_64BIT_INDICES)
109     ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr);
110 #endif
111     ierr = MPI_Type_commit(&link->mine[i]);CHKERRMPI(ierr);
112     ierr = MPI_Type_commit(&link->remote[i]);CHKERRMPI(ierr);
113   }
114   link->next = w->link;
115   w->link    = link;
116 
117   *localtypes  = link->mine;
118   *remotetypes = link->remote;
119   PetscFunctionReturn(0);
120 }
121 
122 /*@C
123    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
124 
125    Logically Collective
126 
127    Input Parameters:
128 +  sf - star forest for communication
129 -  flavor - flavor type
130 
131    Options Database Key:
132 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
133 
134    Level: advanced
135 
136    Notes: Windows reusage follow this rules:
137 
138      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
139 
140      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
141        for i=1 to K
142          PetscSFOperationBegin(rootdata1,leafdata_whatever);
143          PetscSFOperationEnd(rootdata1,leafdata_whatever);
144          ...
145          PetscSFOperationBegin(rootdataN,leafdata_whatever);
146          PetscSFOperationEnd(rootdataN,leafdata_whatever);
147        endfor
148        The following pattern will instead raise an error
149          PetscSFOperationBegin(rootdata1,leafdata_whatever);
150          PetscSFOperationEnd(rootdata1,leafdata_whatever);
151          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
152          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
153 
154      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
155 
156      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
157 
158 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType()
159 @*/
160 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
161 {
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
166   PetscValidLogicalCollectiveEnum(sf,flavor,2);
167   ierr = PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
172 {
173   PetscSF_Window *w = (PetscSF_Window*)sf->data;
174 
175   PetscFunctionBegin;
176   w->flavor = flavor;
177   PetscFunctionReturn(0);
178 }
179 
180 /*@C
181    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
182 
183    Logically Collective
184 
185    Input Parameter:
186 .  sf - star forest for communication
187 
188    Output Parameter:
189 .  flavor - flavor type
190 
191    Level: advanced
192 
193 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType()
194 @*/
195 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
196 {
197   PetscErrorCode ierr;
198 
199   PetscFunctionBegin;
200   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
201   PetscValidPointer(flavor,2);
202   ierr = PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
207 {
208   PetscSF_Window *w = (PetscSF_Window*)sf->data;
209 
210   PetscFunctionBegin;
211   *flavor = w->flavor;
212   PetscFunctionReturn(0);
213 }
214 
215 /*@C
216    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
217 
218    Logically Collective
219 
220    Input Parameters:
221 +  sf - star forest for communication
222 -  sync - synchronization type
223 
224    Options Database Key:
225 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
226 
227    Level: advanced
228 
229 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
230 @*/
231 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
232 {
233   PetscErrorCode ierr;
234 
235   PetscFunctionBegin;
236   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
237   PetscValidLogicalCollectiveEnum(sf,sync,2);
238   ierr = PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
243 {
244   PetscSF_Window *w = (PetscSF_Window*)sf->data;
245 
246   PetscFunctionBegin;
247   w->sync = sync;
248   PetscFunctionReturn(0);
249 }
250 
251 /*@C
252    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
253 
254    Logically Collective
255 
256    Input Parameter:
257 .  sf - star forest for communication
258 
259    Output Parameter:
260 .  sync - synchronization type
261 
262    Level: advanced
263 
264 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType()
265 @*/
266 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
267 {
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
272   PetscValidPointer(sync,2);
273   ierr = PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
278 {
279   PetscSF_Window *w = (PetscSF_Window*)sf->data;
280 
281   PetscFunctionBegin;
282   *sync = w->sync;
283   PetscFunctionReturn(0);
284 }
285 
286 /*@C
287    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
288 
289    Logically Collective
290 
291    Input Parameters:
292 +  sf - star forest for communication
293 -  info - MPI_Info handle
294 
295    Level: advanced
296 
297    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
298 
299 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo()
300 @*/
301 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
302 {
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
307   ierr = PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
312 {
313   PetscSF_Window *w = (PetscSF_Window*)sf->data;
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   if (w->info != MPI_INFO_NULL) {
318     ierr = MPI_Info_free(&w->info);CHKERRMPI(ierr);
319   }
320   if (info != MPI_INFO_NULL) {
321     ierr = MPI_Info_dup(info,&w->info);CHKERRMPI(ierr);
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 /*@C
327    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
328 
329    Logically Collective
330 
331    Input Parameter:
332 .  sf - star forest for communication
333 
334    Output Parameter:
335 .  info - MPI_Info handle
336 
337    Level: advanced
338 
339    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
340 
341 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo()
342 @*/
343 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
344 {
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
349   PetscValidPointer(info,2);
350   ierr = PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
355 {
356   PetscSF_Window *w = (PetscSF_Window*)sf->data;
357 
358   PetscFunctionBegin;
359   *info = w->info;
360   PetscFunctionReturn(0);
361 }
362 
363 /*
364    PetscSFGetWindow - Get a window for use with a given data type
365 
366    Collective on PetscSF
367 
368    Input Parameters:
369 +  sf - star forest
370 .  unit - data type
371 .  array - array to be sent
372 .  sync - type of synchronization PetscSFWindowSyncType
373 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
374 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
375 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
376 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
377 
378    Output Parameters:
379 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
380 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
381 -  win - window
382 
383    Level: developer
384 .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
385 */
386 static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
387 {
388   PetscSF_Window *w = (PetscSF_Window*)sf->data;
389   PetscErrorCode ierr;
390   MPI_Aint       lb,lb_true,bytes,bytes_true;
391   PetscSFWinLink link;
392 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
393   MPI_Aint       winaddr;
394   PetscInt       nranks;
395 #endif
396   PetscBool      reuse = PETSC_FALSE, update = PETSC_FALSE;
397   PetscBool      dummy[2];
398   MPI_Aint       wsize;
399 
400   PetscFunctionBegin;
401   ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRMPI(ierr);
402   ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRMPI(ierr);
403   if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
404   if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
405   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
406   for (link=w->wins; reuse && link; link=link->next) {
407     PetscBool winok = PETSC_FALSE;
408     if (w->flavor != link->flavor) continue;
409     switch (w->flavor) {
410     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
411       if (array == link->addr) {
412         if (PetscDefined(USE_DEBUG)) {
413           dummy[0] = PETSC_TRUE;
414           dummy[1] = PETSC_TRUE;
415           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
416           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
417           if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
418         }
419         if (link->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use");
420         if (epoch && link->epoch) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished");
421         winok = PETSC_TRUE;
422         link->paddr = array;
423       } else if (PetscDefined(USE_DEBUG)) {
424         dummy[0] = PETSC_FALSE;
425         dummy[1] = PETSC_FALSE;
426         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy  ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
427         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
428         if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
429       }
430       break;
431     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
432     case PETSCSF_WINDOW_FLAVOR_SHARED:
433       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
434         update = PETSC_TRUE;
435         link->paddr = array;
436         winok = PETSC_TRUE;
437       }
438       break;
439     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
440     }
441     if (winok) {
442       *win = link->win;
443       ierr = PetscInfo3(sf,"Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
444       goto found;
445     }
446   }
447 
448   wsize = (MPI_Aint)bytes*sf->nroots;
449   ierr = PetscNew(&link);CHKERRQ(ierr);
450   link->bytes           = bytes;
451   link->next            = w->wins;
452   link->flavor          = w->flavor;
453   link->dyn_target_addr = NULL;
454   link->reqs            = NULL;
455   w->wins               = link;
456   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
457     PetscInt i;
458 
459     ierr = PetscMalloc1(sf->nranks,&link->reqs);CHKERRQ(ierr);
460     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
461   }
462   switch (w->flavor) {
463   case PETSCSF_WINDOW_FLAVOR_CREATE:
464     ierr = MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRMPI(ierr);
465     link->addr  = array;
466     link->paddr = array;
467     break;
468 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
469   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
470     ierr = MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRMPI(ierr);
471 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
472     ierr = MPI_Win_attach(link->win,wsize ? array : &ierr,wsize);CHKERRMPI(ierr);
473 #else
474     ierr = MPI_Win_attach(link->win,array,wsize);CHKERRMPI(ierr);
475 #endif
476     link->addr  = array;
477     link->paddr = array;
478     if (!w->dynsf) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()");
479     ierr = PetscSFSetUp(w->dynsf);CHKERRQ(ierr);
480     ierr = PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
481     ierr = PetscMalloc1(nranks,&link->dyn_target_addr);CHKERRQ(ierr);
482     ierr = MPI_Get_address(array,&winaddr);CHKERRMPI(ierr);
483     ierr = PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);CHKERRQ(ierr);
484     ierr = PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);CHKERRQ(ierr);
485     break;
486   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
487     ierr = MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRMPI(ierr);
488     update = PETSC_TRUE;
489     link->paddr = array;
490     break;
491 #endif
492 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
493   case PETSCSF_WINDOW_FLAVOR_SHARED:
494     ierr = MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRMPI(ierr);
495     update = PETSC_TRUE;
496     link->paddr = array;
497     break;
498 #endif
499   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
500   }
501   ierr = PetscInfo3(sf,"New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
502   *win = link->win;
503 
504 found:
505 
506   if (target_disp) *target_disp = link->dyn_target_addr;
507   if (reqs) *reqs = link->reqs;
508   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
509     PetscMPIInt rank;
510 
511     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
512     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);CHKERRMPI(ierr);}
513     ierr = PetscMemcpy(link->addr,array,sf->nroots*bytes);CHKERRQ(ierr);
514     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
515       ierr = MPI_Win_unlock(rank,*win);CHKERRMPI(ierr);
516       ierr = MPI_Win_fence(0,*win);CHKERRMPI(ierr);
517     }
518   }
519   link->inuse = PETSC_TRUE;
520   link->epoch = epoch;
521   if (epoch) {
522     switch (sync) {
523     case PETSCSF_WINDOW_SYNC_FENCE:
524       ierr = MPI_Win_fence(fenceassert,*win);CHKERRMPI(ierr);
525       break;
526     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
527       break;
528     case PETSCSF_WINDOW_SYNC_ACTIVE: {
529       MPI_Group   ingroup,outgroup;
530       PetscMPIInt isize,osize;
531 
532       /* OpenMPI 4.0.2 with btl=vader does not like calling
533          - MPI_Win_complete when ogroup is empty
534          - MPI_Win_wait when igroup is empty
535          So, we do not even issue the corresponding start and post calls
536          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
537          start(outgroup) has a matching post(ingroup)
538          and this is guaranteed by PetscSF
539       */
540       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
541       ierr = MPI_Group_size(ingroup,&isize);CHKERRMPI(ierr);
542       ierr = MPI_Group_size(outgroup,&osize);CHKERRMPI(ierr);
543       if (isize) {ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRMPI(ierr);}
544       if (osize) {ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRMPI(ierr);}
545     } break;
546     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
547     }
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 /*
553    PetscSFFindWindow - Finds a window that is already in use
554 
555    Not Collective
556 
557    Input Parameters:
558 +  sf - star forest
559 .  unit - data type
560 -  array - array with which the window is associated
561 
562    Output Parameters:
563 +  win - window
564 -  reqs - outstanding requests associated to the window
565 
566    Level: developer
567 
568 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
569 */
570 static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
571 {
572   PetscSF_Window *w = (PetscSF_Window*)sf->data;
573   PetscSFWinLink link;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   *win = MPI_WIN_NULL;
578   for (link=w->wins; link; link=link->next) {
579     if (array == link->paddr) {
580 
581       ierr = PetscInfo3(sf,"Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
582       *win = link->win;
583       *reqs = link->reqs;
584       PetscFunctionReturn(0);
585     }
586   }
587   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
588 }
589 
590 /*
591    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
592 
593    Collective
594 
595    Input Parameters:
596 +  sf - star forest
597 .  unit - data type
598 .  array - array associated with window
599 .  sync - type of synchronization PetscSFWindowSyncType
600 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
601 .  update - if we have to update the local window array
602 -  win - window
603 
604    Level: developer
605 
606 .seealso: PetscSFFindWindow()
607 */
608 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
609 {
610   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
611   PetscErrorCode          ierr;
612   PetscSFWinLink          *p,link;
613   PetscBool               reuse = PETSC_FALSE;
614   PetscSFWindowFlavorType flavor;
615   void*                   laddr;
616   size_t                  bytes;
617 
618   PetscFunctionBegin;
619   for (p=&w->wins; *p; p=&(*p)->next) {
620     link = *p;
621     if (*win == link->win) {
622       if (array != link->paddr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
623       if (epoch != link->epoch) {
624         if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
625         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
626       }
627       laddr = link->addr;
628       flavor = link->flavor;
629       bytes = link->bytes;
630       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
631       else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
632       goto found;
633     }
634   }
635   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
636 
637 found:
638   ierr = PetscInfo3(sf,"Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
639   if (epoch) {
640     switch (sync) {
641     case PETSCSF_WINDOW_SYNC_FENCE:
642       ierr = MPI_Win_fence(fenceassert,*win);CHKERRMPI(ierr);
643       break;
644     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
645       break;
646     case PETSCSF_WINDOW_SYNC_ACTIVE: {
647       MPI_Group   ingroup,outgroup;
648       PetscMPIInt isize,osize;
649 
650       /* OpenMPI 4.0.2 with btl=wader does not like calling
651          - MPI_Win_complete when ogroup is empty
652          - MPI_Win_wait when igroup is empty
653          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
654          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
655          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
656       */
657       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
658       ierr = MPI_Group_size(ingroup,&isize);CHKERRMPI(ierr);
659       ierr = MPI_Group_size(outgroup,&osize);CHKERRMPI(ierr);
660       if (osize) {ierr = MPI_Win_complete(*win);CHKERRMPI(ierr);}
661       if (isize) {ierr = MPI_Win_wait(*win);CHKERRMPI(ierr);}
662     } break;
663     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
664     }
665   }
666   if (update) {
667     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
668       ierr = MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);CHKERRMPI(ierr);
669     }
670     ierr = PetscMemcpy(array,laddr,sf->nroots*bytes);CHKERRQ(ierr);
671   }
672   link->epoch = PETSC_FALSE;
673   link->inuse = PETSC_FALSE;
674   link->paddr = NULL;
675   if (!reuse) {
676     ierr = PetscFree(link->dyn_target_addr);CHKERRQ(ierr);
677     ierr = PetscFree(link->reqs);CHKERRQ(ierr);
678     ierr = MPI_Win_free(&link->win);CHKERRMPI(ierr);
679     ierr = PetscFree(link);CHKERRQ(ierr);
680     *win = MPI_WIN_NULL;
681   }
682   PetscFunctionReturn(0);
683 }
684 
685 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
686 {
687   PetscSF_Window *w = (PetscSF_Window*)sf->data;
688   PetscErrorCode ierr;
689   MPI_Group      ingroup,outgroup;
690 
691   PetscFunctionBegin;
692   ierr = PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);CHKERRQ(ierr);
693   if (!w->dynsf) {
694     PetscInt    i;
695     PetscSFNode *remotes;
696 
697     ierr = PetscMalloc1(sf->nranks,&remotes);CHKERRQ(ierr);
698     for (i=0;i<sf->nranks;i++) {
699       remotes[i].rank  = sf->ranks[i];
700       remotes[i].index = 0;
701     }
702     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);CHKERRQ(ierr);
703     ierr = PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE);CHKERRQ(ierr); /* break recursion */
704     ierr = PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);CHKERRQ(ierr);
705     ierr = PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);CHKERRQ(ierr);
706   }
707   switch (w->sync) {
708   case PETSCSF_WINDOW_SYNC_ACTIVE:
709     ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
710   default:
711     break;
712   }
713   PetscFunctionReturn(0);
714 }
715 
716 static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
717 {
718   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
719   PetscErrorCode          ierr;
720   PetscSFWindowFlavorType flavor = w->flavor;
721 
722   PetscFunctionBegin;
723   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");CHKERRQ(ierr);
724   ierr = PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);CHKERRQ(ierr);
725   ierr = PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);CHKERRQ(ierr);
726   ierr = PetscSFWindowSetFlavorType(sf,flavor);CHKERRQ(ierr);
727   ierr = PetscOptionsTail();CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
732 {
733   PetscSF_Window  *w = (PetscSF_Window*)sf->data;
734   PetscErrorCode  ierr;
735   PetscSFDataLink link,next;
736   PetscSFWinLink  wlink,wnext;
737   PetscInt        i;
738 
739   PetscFunctionBegin;
740   for (link=w->link; link; link=next) {
741     next = link->next;
742     ierr = MPI_Type_free(&link->unit);CHKERRMPI(ierr);
743     for (i=0; i<sf->nranks; i++) {
744       ierr = MPI_Type_free(&link->mine[i]);CHKERRMPI(ierr);
745       ierr = MPI_Type_free(&link->remote[i]);CHKERRMPI(ierr);
746     }
747     ierr = PetscFree2(link->mine,link->remote);CHKERRQ(ierr);
748     ierr = PetscFree(link);CHKERRQ(ierr);
749   }
750   w->link = NULL;
751   for (wlink=w->wins; wlink; wlink=wnext) {
752     wnext = wlink->next;
753     if (wlink->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
754     ierr = PetscFree(wlink->dyn_target_addr);CHKERRQ(ierr);
755     ierr = PetscFree(wlink->reqs);CHKERRQ(ierr);
756     ierr = MPI_Win_free(&wlink->win);CHKERRMPI(ierr);
757     ierr = PetscFree(wlink);CHKERRQ(ierr);
758   }
759   w->wins = NULL;
760   ierr = PetscSFDestroy(&w->dynsf);CHKERRQ(ierr);
761   if (w->info != MPI_INFO_NULL) {
762     ierr = MPI_Info_free(&w->info);CHKERRMPI(ierr);
763   }
764   PetscFunctionReturn(0);
765 }
766 
767 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
768 {
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   ierr = PetscSFReset_Window(sf);CHKERRQ(ierr);
773   ierr = PetscFree(sf->data);CHKERRQ(ierr);
774   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);CHKERRQ(ierr);
775   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);CHKERRQ(ierr);
776   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);CHKERRQ(ierr);
777   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);CHKERRQ(ierr);
778   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
784 {
785   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
786   PetscErrorCode    ierr;
787   PetscBool         iascii;
788   PetscViewerFormat format;
789 
790   PetscFunctionBegin;
791   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
792   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
793   if (iascii) {
794     ierr = PetscViewerASCIIPrintf(viewer,"  current flavor=%s synchronization=%s MultiSF sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
795     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
796       if (w->info != MPI_INFO_NULL) {
797         PetscMPIInt k,nkeys;
798         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
799 
800         ierr = MPI_Info_get_nkeys(w->info,&nkeys);CHKERRMPI(ierr);
801         ierr = PetscViewerASCIIPrintf(viewer,"    current info with %d keys. Ordered key-value pairs follow:\n",nkeys);CHKERRQ(ierr);
802         for (k = 0; k < nkeys; k++) {
803           PetscMPIInt flag;
804 
805           ierr = MPI_Info_get_nthkey(w->info,k,key);CHKERRMPI(ierr);
806           ierr = MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);CHKERRMPI(ierr);
807           if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key);
808           ierr = PetscViewerASCIIPrintf(viewer,"      %s = %s\n",key,value);CHKERRQ(ierr);
809         }
810       } else {
811         ierr = PetscViewerASCIIPrintf(viewer,"    current info=MPI_INFO_NULL\n");CHKERRQ(ierr);
812       }
813     }
814   }
815   PetscFunctionReturn(0);
816 }
817 
818 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
819 {
820   PetscSF_Window        *w = (PetscSF_Window*)sf->data;
821   PetscErrorCode        ierr;
822   PetscSFWindowSyncType synctype;
823 
824   PetscFunctionBegin;
825   synctype = w->sync;
826   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
827   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
828   ierr = PetscSFWindowSetSyncType(newsf,synctype);CHKERRQ(ierr);
829   ierr = PetscSFWindowSetFlavorType(newsf,w->flavor);CHKERRQ(ierr);
830   ierr = PetscSFWindowSetInfo(newsf,w->info);CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833 
834 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
835 {
836   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
837   PetscErrorCode     ierr;
838   PetscInt           i,nranks;
839   const PetscMPIInt  *ranks;
840   const MPI_Aint     *target_disp;
841   const MPI_Datatype *mine,*remote;
842   MPI_Request        *reqs;
843   MPI_Win            win;
844 
845   PetscFunctionBegin;
846   if (op != MPI_REPLACE) SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
847   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
848   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
849   ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);CHKERRQ(ierr);
850   for (i=0; i<nranks; i++) {
851     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
852 
853     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
854       ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRMPI(ierr);
855 #if defined(PETSC_HAVE_MPI_RGET)
856       ierr = MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);CHKERRMPI(ierr);
857 #else
858       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRMPI(ierr);
859 #endif
860     } else {
861       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRMPI(ierr);
862     }
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
868 {
869   PetscSF_Window *w = (PetscSF_Window*)sf->data;
870   PetscErrorCode ierr;
871   MPI_Win        win;
872   MPI_Request    *reqs = NULL;
873 
874   PetscFunctionBegin;
875   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
876   if (reqs) {ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
877   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
878     PetscInt           i,nranks;
879     const PetscMPIInt  *ranks;
880 
881     ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
882     for (i=0; i<nranks; i++) {
883       ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);
884     }
885   }
886   ierr = PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
891 {
892   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
893   PetscErrorCode     ierr;
894   PetscInt           i,nranks;
895   const PetscMPIInt  *ranks;
896   const MPI_Aint     *target_disp;
897   const MPI_Datatype *mine,*remote;
898   MPI_Win            win;
899 
900   PetscFunctionBegin;
901   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
902   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
903   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
904   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
905   for (i=0; i<nranks; i++) {
906     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
907 
908     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRMPI(ierr);}
909     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);CHKERRMPI(ierr);
910     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);}
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
916 {
917   PetscSF_Window *w = (PetscSF_Window*)sf->data;
918   PetscErrorCode ierr;
919   MPI_Win        win;
920   MPI_Request    *reqs = NULL;
921 
922   PetscFunctionBegin;
923   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
924   if (reqs) {ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
925   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
930 {
931   PetscErrorCode     ierr;
932   PetscInt           i,nranks;
933   const PetscMPIInt  *ranks;
934   const MPI_Datatype *mine,*remote;
935   const MPI_Aint     *target_disp;
936   MPI_Win            win;
937   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
938 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
939   PetscSFWindowFlavorType oldf;
940 #endif
941 
942   PetscFunctionBegin;
943   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
944   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
945   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
946 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
947   /* FetchAndOp without MPI_Get_Accumulate requires locking.
948      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
949   oldf = w->flavor;
950   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
951   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
952 #else
953   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
954 #endif
955   for (i=0; i<nranks; i++) {
956     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
957 
958 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
959     ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);CHKERRMPI(ierr);
960     ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRMPI(ierr);
961     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);CHKERRMPI(ierr);
962     ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);
963 #else
964     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);CHKERRMPI(ierr);}
965     ierr = MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win);CHKERRMPI(ierr);
966     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);}
967 #endif
968   }
969 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
970   w->flavor = oldf;
971 #endif
972   PetscFunctionReturn(0);
973 }
974 
975 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
976 {
977   PetscErrorCode ierr;
978   MPI_Win        win;
979 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
980   PetscSF_Window *w = (PetscSF_Window*)sf->data;
981 #endif
982   MPI_Request    *reqs = NULL;
983 
984   PetscFunctionBegin;
985   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
986   if (reqs) {ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
987 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
988   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
989 #else
990   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);CHKERRQ(ierr);
991 #endif
992   PetscFunctionReturn(0);
993 }
994 
995 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
996 {
997   PetscSF_Window *w = (PetscSF_Window*)sf->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   sf->ops->SetUp           = PetscSFSetUp_Window;
1002   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
1003   sf->ops->Reset           = PetscSFReset_Window;
1004   sf->ops->Destroy         = PetscSFDestroy_Window;
1005   sf->ops->View            = PetscSFView_Window;
1006   sf->ops->Duplicate       = PetscSFDuplicate_Window;
1007   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
1008   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
1009   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
1010   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
1011   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1012   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
1013 
1014   ierr = PetscNewLog(sf,&w);CHKERRQ(ierr);
1015   sf->data  = (void*)w;
1016   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
1017   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1018   w->info   = MPI_INFO_NULL;
1019 
1020   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);CHKERRQ(ierr);
1026 
1027 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
1028   {
1029     PetscBool ackbug = PETSC_FALSE;
1030     ierr = PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);CHKERRQ(ierr);
1031     if (ackbug) {
1032       ierr = PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");CHKERRQ(ierr);
1033     } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1034   }
1035 #endif
1036   PetscFunctionReturn(0);
1037 }
1038