xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision 2da392cc7c10228af19ad9843ce5155178acb644)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype    *mine;
18   MPI_Datatype    *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                    *addr;
26   void                    *paddr;
27   MPI_Win                 win;
28   MPI_Request             *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint                *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPIU_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40 {
41   PetscFunctionBegin;
42   if (*op == MPIU_SUM) *op = MPI_SUM;
43   else if (*op == MPIU_MAX) *op = MPI_MAX;
44   else if (*op == MPIU_MIN) *op = MPI_MIN;
45   PetscFunctionReturn(0);
46 }
47 
48 /*@C
49    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50 
51    Not Collective
52 
53    Input Arguments:
54 +  sf - star forest
55 -  unit - data type for each node
56 
57    Output Arguments:
58 +  localtypes - types describing part of local leaf buffer referencing each remote rank
59 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
60 
61    Level: developer
62 
63 .seealso: PetscSFSetGraph(), PetscSFView()
64 @*/
65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
66 {
67   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
68   PetscErrorCode    ierr;
69   PetscSFDataLink   link;
70   PetscInt          i,nranks;
71   const PetscInt    *roffset,*rmine,*rremote;
72   const PetscMPIInt *ranks;
73 
74   PetscFunctionBegin;
75   /* Look for types in cache */
76   for (link=w->link; link; link=link->next) {
77     PetscBool match;
78     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
79     if (match) {
80       *localtypes  = link->mine;
81       *remotetypes = link->remote;
82       PetscFunctionReturn(0);
83     }
84   }
85 
86   /* Create new composite types for each send rank */
87   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr);
88   ierr = PetscNew(&link);CHKERRQ(ierr);
89   ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr);
90   ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr);
91   for (i=0; i<nranks; i++) {
92     PetscInt    rcount = roffset[i+1] - roffset[i];
93     PetscMPIInt *rmine,*rremote;
94 #if !defined(PETSC_USE_64BIT_INDICES)
95     rmine   = sf->rmine + sf->roffset[i];
96     rremote = sf->rremote + sf->roffset[i];
97 #else
98     PetscInt j;
99     ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr);
100     for (j=0; j<rcount; j++) {
101       ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr);
102       ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr);
103     }
104 #endif
105 
106     ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr);
107     ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr);
108 #if defined(PETSC_USE_64BIT_INDICES)
109     ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr);
110 #endif
111     ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr);
112     ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr);
113   }
114   link->next = w->link;
115   w->link    = link;
116 
117   *localtypes  = link->mine;
118   *remotetypes = link->remote;
119   PetscFunctionReturn(0);
120 }
121 
122 /*@C
123    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
124 
125    Logically Collective
126 
127    Input Arguments:
128 +  sf - star forest for communication
129 -  flavor - flavor type
130 
131    Options Database Key:
132 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
133 
134    Level: advanced
135 
136    Notes: Windows reusage follow this rules:
137 
138      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
139 
140      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
141        for i=1 to K
142          PetscSFOperationBegin(rootdata1,leafdata_whatever);
143          PetscSFOperationEnd(rootdata1,leafdata_whatever);
144          ...
145          PetscSFOperationBegin(rootdataN,leafdata_whatever);
146          PetscSFOperationEnd(rootdataN,leafdata_whatever);
147        endfor
148        The following pattern will instead raise an error
149          PetscSFOperationBegin(rootdata1,leafdata_whatever);
150          PetscSFOperationEnd(rootdata1,leafdata_whatever);
151          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
152          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
153 
154      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
155 
156      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
157 
158 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType()
159 @*/
160 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
161 {
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
166   PetscValidLogicalCollectiveEnum(sf,flavor,2);
167   ierr = PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
172 {
173   PetscSF_Window *w = (PetscSF_Window*)sf->data;
174 
175   PetscFunctionBegin;
176   w->flavor = flavor;
177   PetscFunctionReturn(0);
178 }
179 
180 /*@C
181    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
182 
183    Logically Collective
184 
185    Input Argument:
186 .  sf - star forest for communication
187 
188    Output Argument:
189 .  flavor - flavor type
190 
191    Level: advanced
192 
193 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType()
194 @*/
195 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
196 {
197   PetscErrorCode ierr;
198 
199   PetscFunctionBegin;
200   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
201   PetscValidPointer(flavor,2);
202   ierr = PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
207 {
208   PetscSF_Window *w = (PetscSF_Window*)sf->data;
209 
210   PetscFunctionBegin;
211   *flavor = w->flavor;
212   PetscFunctionReturn(0);
213 }
214 
215 /*@C
216    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
217 
218    Logically Collective
219 
220    Input Arguments:
221 +  sf - star forest for communication
222 -  sync - synchronization type
223 
224    Options Database Key:
225 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
226 
227    Level: advanced
228 
229 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
230 @*/
231 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
232 {
233   PetscErrorCode ierr;
234 
235   PetscFunctionBegin;
236   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
237   PetscValidLogicalCollectiveEnum(sf,sync,2);
238   ierr = PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
243 {
244   PetscSF_Window *w = (PetscSF_Window*)sf->data;
245 
246   PetscFunctionBegin;
247   w->sync = sync;
248   PetscFunctionReturn(0);
249 }
250 
251 /*@C
252    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
253 
254    Logically Collective
255 
256    Input Argument:
257 .  sf - star forest for communication
258 
259    Output Argument:
260 .  sync - synchronization type
261 
262    Level: advanced
263 
264 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType()
265 @*/
266 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
267 {
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
272   PetscValidPointer(sync,2);
273   ierr = PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
278 {
279   PetscSF_Window *w = (PetscSF_Window*)sf->data;
280 
281   PetscFunctionBegin;
282   *sync = w->sync;
283   PetscFunctionReturn(0);
284 }
285 
286 /*@C
287    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
288 
289    Logically Collective
290 
291    Input Argument:
292 +  sf - star forest for communication
293 -  info - MPI_Info handle
294 
295    Level: advanced
296 
297    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
298 
299 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo()
300 @*/
301 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
302 {
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
307   ierr = PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
312 {
313   PetscSF_Window *w = (PetscSF_Window*)sf->data;
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   if (w->info != MPI_INFO_NULL) {
318     ierr = MPI_Info_free(&w->info);CHKERRQ(ierr);
319   }
320   if (info != MPI_INFO_NULL) {
321     ierr = MPI_Info_dup(info,&w->info);CHKERRQ(ierr);
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 /*@C
327    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
328 
329    Logically Collective
330 
331    Input Argument:
332 .  sf - star forest for communication
333 
334    Output Argument:
335 .  info - MPI_Info handle
336 
337    Level: advanced
338 
339    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
340 
341 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo()
342 @*/
343 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
344 {
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
349   PetscValidPointer(info,2);
350   ierr = PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
355 {
356   PetscSF_Window *w = (PetscSF_Window*)sf->data;
357 
358   PetscFunctionBegin;
359   *info = w->info;
360   PetscFunctionReturn(0);
361 }
362 
363 /*
364    PetscSFGetWindow - Get a window for use with a given data type
365 
366    Collective on PetscSF
367 
368    Input Arguments:
369 +  sf - star forest
370 .  unit - data type
371 .  array - array to be sent
372 .  sync - type of synchronization PetscSFWindowSyncType
373 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
374 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
375 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
376 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
377 
378    Output Arguments:
379 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
380 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
381 -  win - window
382 
383    Level: developer
384 .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
385 */
386 static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
387 {
388   PetscSF_Window *w = (PetscSF_Window*)sf->data;
389   PetscErrorCode ierr;
390   MPI_Aint       lb,lb_true,bytes,bytes_true;
391   PetscSFWinLink link;
392   MPI_Aint       winaddr;
393   PetscInt       nranks;
394   PetscBool      reuse = PETSC_FALSE, update = PETSC_FALSE;
395   PetscBool      dummy[2];
396   MPI_Aint       wsize;
397 
398   PetscFunctionBegin;
399   ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRQ(ierr);
400   ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRQ(ierr);
401   if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
402   if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
403   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
404   for (link=w->wins; reuse && link; link=link->next) {
405     PetscBool winok = PETSC_FALSE;
406     if (w->flavor != link->flavor) continue;
407     switch (w->flavor) {
408     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
409       if (array == link->addr) {
410         if (PetscDefined(USE_DEBUG)) {
411           dummy[0] = PETSC_TRUE;
412           dummy[1] = PETSC_TRUE;
413           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy  ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
414           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
415           if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
416         }
417         if (link->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use");
418         if (epoch && link->epoch) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished");
419         winok = PETSC_TRUE;
420         link->paddr = array;
421       } else if (PetscDefined(USE_DEBUG)) {
422         dummy[0] = PETSC_FALSE;
423         dummy[1] = PETSC_FALSE;
424         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy  ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
425         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
426         if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
427       }
428       break;
429     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
430     case PETSCSF_WINDOW_FLAVOR_SHARED:
431       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
432         update = PETSC_TRUE;
433         link->paddr = array;
434         winok = PETSC_TRUE;
435       }
436       break;
437     default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
438     }
439     if (winok) {
440       *win = link->win;
441       ierr = PetscInfo3(sf,"Reusing window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
442       goto found;
443     }
444   }
445 
446   wsize = (MPI_Aint)bytes*sf->nroots;
447   ierr = PetscNew(&link);CHKERRQ(ierr);
448   link->bytes           = bytes;
449   link->next            = w->wins;
450   link->flavor          = w->flavor;
451   link->dyn_target_addr = NULL;
452   link->reqs            = NULL;
453   w->wins               = link;
454   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
455     PetscInt i;
456 
457     ierr = PetscMalloc1(sf->nranks,&link->reqs);CHKERRQ(ierr);
458     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
459   }
460   switch (w->flavor) {
461   case PETSCSF_WINDOW_FLAVOR_CREATE:
462     ierr = MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr);
463     link->addr  = array;
464     link->paddr = array;
465     break;
466   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
467     ierr = MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr);
468 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
469     ierr = MPI_Win_attach(link->win,wsize ? array : &ierr,wsize);CHKERRQ(ierr);
470 #else
471     ierr = MPI_Win_attach(link->win,array,wsize);CHKERRQ(ierr);
472 #endif
473     link->addr  = array;
474     link->paddr = array;
475     if (!w->dynsf) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()");
476     ierr = PetscSFSetUp(w->dynsf);CHKERRQ(ierr);
477     ierr = PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
478     ierr = PetscMalloc1(nranks,&link->dyn_target_addr);CHKERRQ(ierr);
479     ierr = MPI_Get_address(array,&winaddr);CHKERRQ(ierr);
480     ierr = PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr);CHKERRQ(ierr);
481     ierr = PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr);CHKERRQ(ierr);
482     break;
483   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
484     ierr = MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRQ(ierr);
485     update = PETSC_TRUE;
486     link->paddr = array;
487     break;
488 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
489   case PETSCSF_WINDOW_FLAVOR_SHARED:
490     ierr = MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRQ(ierr);
491     update = PETSC_TRUE;
492     link->paddr = array;
493     break;
494 #endif
495   default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
496   }
497   ierr = PetscInfo3(sf,"New window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
498   *win = link->win;
499 
500 found:
501 
502   if (target_disp) *target_disp = link->dyn_target_addr;
503   if (reqs) *reqs = link->reqs;
504   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
505     PetscMPIInt rank;
506 
507     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
508     if (sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);CHKERRQ(ierr); }
509     ierr = PetscMemcpy(link->addr,array,sf->nroots*bytes);CHKERRQ(ierr);
510     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
511       ierr = MPI_Win_unlock(rank,*win);CHKERRQ(ierr);
512       ierr = MPI_Win_fence(0,*win);CHKERRQ(ierr);
513     }
514   }
515   link->inuse = PETSC_TRUE;
516   link->epoch = epoch;
517   if (epoch) {
518     switch (sync) {
519     case PETSCSF_WINDOW_SYNC_FENCE:
520       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
521       break;
522     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
523       break;
524     case PETSCSF_WINDOW_SYNC_ACTIVE: {
525       MPI_Group   ingroup,outgroup;
526       PetscMPIInt isize,osize;
527 
528       /* OpenMPI 4.0.2 with btl=vader does not like calling
529          - MPI_Win_complete when ogroup is empty
530          - MPI_Win_wait when igroup is empty
531          So, we do not even issue the corresponding start and post calls
532          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
533          start(outgroup) has a matching post(ingroup)
534          and this is guaranteed by PetscSF
535       */
536       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
537       ierr = MPI_Group_size(ingroup,&isize);CHKERRQ(ierr);
538       ierr = MPI_Group_size(outgroup,&osize);CHKERRQ(ierr);
539       if (isize) { ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRQ(ierr); }
540       if (osize) { ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRQ(ierr); }
541     } break;
542     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
543     }
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 /*
549    PetscSFFindWindow - Finds a window that is already in use
550 
551    Not Collective
552 
553    Input Arguments:
554 +  sf - star forest
555 .  unit - data type
556 -  array - array with which the window is associated
557 
558    Output Arguments:
559 +  win - window
560 -  reqs - outstanding requests associated to the window
561 
562    Level: developer
563 
564 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
565 */
566 static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
567 {
568   PetscSF_Window *w = (PetscSF_Window*)sf->data;
569   PetscSFWinLink link;
570   PetscErrorCode ierr;
571 
572   PetscFunctionBegin;
573   *win = MPI_WIN_NULL;
574   for (link=w->wins; link; link=link->next) {
575     if (array == link->paddr) {
576       ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
577       *win = link->win;
578       *reqs = link->reqs;
579       PetscFunctionReturn(0);
580     }
581   }
582   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
583   PetscFunctionReturn(0);
584 }
585 
586 /*
587    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
588 
589    Collective
590 
591    Input Arguments:
592 +  sf - star forest
593 .  unit - data type
594 .  array - array associated with window
595 .  sync - type of synchronization PetscSFWindowSyncType
596 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
597 .  update - if we have to update the local window array
598 -  win - window
599 
600    Level: developer
601 
602 .seealso: PetscSFFindWindow()
603 */
604 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
605 {
606   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
607   PetscErrorCode          ierr;
608   PetscSFWinLink          *p,link;
609   PetscBool               reuse = PETSC_FALSE;
610   PetscSFWindowFlavorType flavor;
611   void*                   laddr;
612   size_t                  bytes;
613 
614   PetscFunctionBegin;
615   for (p=&w->wins; *p; p=&(*p)->next) {
616     link = *p;
617     if (*win == link->win) {
618       if (array != link->paddr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
619       if (epoch != link->epoch) {
620         if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
621         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
622       }
623       laddr = link->addr;
624       flavor = link->flavor;
625       bytes = link->bytes;
626       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
627       else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
628       goto found;
629     }
630   }
631   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
632 
633 found:
634   ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
635   if (epoch) {
636     switch (sync) {
637     case PETSCSF_WINDOW_SYNC_FENCE:
638       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
639       break;
640     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
641       break;
642     case PETSCSF_WINDOW_SYNC_ACTIVE: {
643       MPI_Group   ingroup,outgroup;
644       PetscMPIInt isize,osize;
645 
646       /* OpenMPI 4.0.2 with btl=wader does not like calling
647          - MPI_Win_complete when ogroup is empty
648          - MPI_Win_wait when igroup is empty
649          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
650          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
651          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
652       */
653       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
654       ierr = MPI_Group_size(ingroup,&isize);CHKERRQ(ierr);
655       ierr = MPI_Group_size(outgroup,&osize);CHKERRQ(ierr);
656       if (osize) { ierr = MPI_Win_complete(*win);CHKERRQ(ierr); }
657       if (isize) { ierr = MPI_Win_wait(*win);CHKERRQ(ierr); }
658     } break;
659     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
660     }
661   }
662   if (update) {
663     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
664       ierr = MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);CHKERRQ(ierr);
665     }
666     ierr = PetscMemcpy(array,laddr,sf->nroots*bytes);CHKERRQ(ierr);
667   }
668   link->epoch = PETSC_FALSE;
669   link->inuse = PETSC_FALSE;
670   link->paddr = NULL;
671   if (!reuse) {
672     ierr = PetscFree(link->dyn_target_addr);CHKERRQ(ierr);
673     ierr = PetscFree(link->reqs);CHKERRQ(ierr);
674     ierr = MPI_Win_free(&link->win);CHKERRQ(ierr);
675     ierr = PetscFree(link);CHKERRQ(ierr);
676     *win = MPI_WIN_NULL;
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
682 {
683   PetscSF_Window *w = (PetscSF_Window*)sf->data;
684   PetscErrorCode ierr;
685   MPI_Group      ingroup,outgroup;
686 
687   PetscFunctionBegin;
688   ierr = PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);CHKERRQ(ierr);
689   if (!w->dynsf) {
690     PetscInt    i;
691     PetscSFNode *remotes;
692 
693     ierr = PetscMalloc1(sf->nranks,&remotes);CHKERRQ(ierr);
694     for (i=0;i<sf->nranks;i++) {
695       remotes[i].rank  = sf->ranks[i];
696       remotes[i].index = 0;
697     }
698     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);CHKERRQ(ierr);
699     ierr = PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE);CHKERRQ(ierr); /* break recursion */
700     ierr = PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);CHKERRQ(ierr);
701     ierr = PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);CHKERRQ(ierr);
702   }
703   switch (w->sync) {
704   case PETSCSF_WINDOW_SYNC_ACTIVE:
705     ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
706   default:
707     break;
708   }
709   PetscFunctionReturn(0);
710 }
711 
712 static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
713 {
714   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
715   PetscErrorCode          ierr;
716   PetscSFWindowFlavorType flavor = w->flavor;
717 
718   PetscFunctionBegin;
719   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");CHKERRQ(ierr);
720   ierr = PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);CHKERRQ(ierr);
721   ierr = PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);CHKERRQ(ierr);
722   ierr = PetscSFWindowSetFlavorType(sf,flavor);CHKERRQ(ierr);
723   ierr = PetscOptionsTail();CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
728 {
729   PetscSF_Window  *w = (PetscSF_Window*)sf->data;
730   PetscErrorCode  ierr;
731   PetscSFDataLink link,next;
732   PetscSFWinLink  wlink,wnext;
733   PetscInt        i;
734 
735   PetscFunctionBegin;
736   for (link=w->link; link; link=next) {
737     next = link->next;
738     ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);
739     for (i=0; i<sf->nranks; i++) {
740       ierr = MPI_Type_free(&link->mine[i]);CHKERRQ(ierr);
741       ierr = MPI_Type_free(&link->remote[i]);CHKERRQ(ierr);
742     }
743     ierr = PetscFree2(link->mine,link->remote);CHKERRQ(ierr);
744     ierr = PetscFree(link);CHKERRQ(ierr);
745   }
746   w->link = NULL;
747   for (wlink=w->wins; wlink; wlink=wnext) {
748     wnext = wlink->next;
749     if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
750     ierr = PetscFree(wlink->dyn_target_addr);CHKERRQ(ierr);
751     ierr = PetscFree(wlink->reqs);CHKERRQ(ierr);
752     ierr = MPI_Win_free(&wlink->win);CHKERRQ(ierr);
753     ierr = PetscFree(wlink);CHKERRQ(ierr);
754   }
755   w->wins = NULL;
756   ierr = PetscSFDestroy(&w->dynsf);CHKERRQ(ierr);
757   if (w->info != MPI_INFO_NULL) {
758     ierr = MPI_Info_free(&w->info);CHKERRQ(ierr);
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
764 {
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   ierr = PetscSFReset_Window(sf);CHKERRQ(ierr);
769   ierr = PetscFree(sf->data);CHKERRQ(ierr);
770   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);CHKERRQ(ierr);
771   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);CHKERRQ(ierr);
772   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);CHKERRQ(ierr);
773   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);CHKERRQ(ierr);
774   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);CHKERRQ(ierr);
775   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
780 {
781   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
782   PetscErrorCode    ierr;
783   PetscBool         iascii;
784   PetscViewerFormat format;
785 
786   PetscFunctionBegin;
787   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
788   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
789   if (iascii) {
790     ierr = PetscViewerASCIIPrintf(viewer,"  current flavor=%s synchronization=%s sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
791     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
792       if (w->info != MPI_INFO_NULL) {
793         PetscMPIInt k,nkeys;
794         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
795 
796         ierr = MPI_Info_get_nkeys(w->info,&nkeys);CHKERRQ(ierr);
797         ierr = PetscViewerASCIIPrintf(viewer,"    current info with %d keys. Ordered key-value pairs follow:\n",nkeys);CHKERRQ(ierr);
798         for (k = 0; k < nkeys; k++) {
799           PetscMPIInt flag;
800 
801           ierr = MPI_Info_get_nthkey(w->info,k,key);CHKERRQ(ierr);
802           ierr = MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);CHKERRQ(ierr);
803           if (!flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key);
804           ierr = PetscViewerASCIIPrintf(viewer,"      %s = %s\n",key,value);CHKERRQ(ierr);
805         }
806       } else {
807         ierr = PetscViewerASCIIPrintf(viewer,"    current info=MPI_INFO_NULL\n");CHKERRQ(ierr);
808       }
809     }
810   }
811   PetscFunctionReturn(0);
812 }
813 
814 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
815 {
816   PetscSF_Window        *w = (PetscSF_Window*)sf->data;
817   PetscErrorCode        ierr;
818   PetscSFWindowSyncType synctype;
819 
820   PetscFunctionBegin;
821   synctype = w->sync;
822   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
823   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
824   ierr = PetscSFWindowSetSyncType(newsf,synctype);CHKERRQ(ierr);
825   ierr = PetscSFWindowSetFlavorType(newsf,w->flavor);CHKERRQ(ierr);
826   ierr = PetscSFWindowSetInfo(newsf,w->info);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 static PetscErrorCode PetscSFBcastAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
831 {
832   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
833   PetscErrorCode     ierr;
834   PetscInt           i,nranks;
835   const PetscMPIInt  *ranks;
836   const MPI_Aint     *target_disp;
837   const MPI_Datatype *mine,*remote;
838   MPI_Request        *reqs;
839   MPI_Win            win;
840 
841   PetscFunctionBegin;
842   if (op != MPI_REPLACE || op != MPIU_REPLACE) SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastAndOpBegin_Window with op!=MPI_REPLACE has not been implemented");
843   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
844   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
845   ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);CHKERRQ(ierr);
846   for (i=0; i<nranks; i++) {
847     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
848 
849     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
850       ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);
851 #if defined(PETSC_HAVE_MPI_RGET)
852       ierr = MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);CHKERRQ(ierr);
853 #else
854       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr);
855 #endif
856     } else {
857       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr);
858     }
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode PetscSFBcastAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
864 {
865   PetscSF_Window *w = (PetscSF_Window*)sf->data;
866   PetscErrorCode ierr;
867   MPI_Win        win;
868   MPI_Request    *reqs = NULL;
869 
870   PetscFunctionBegin;
871   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
872   if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); }
873   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
874     PetscInt           i,nranks;
875     const PetscMPIInt  *ranks;
876 
877     ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
878     for (i=0; i<nranks; i++) {
879       ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);
880     }
881   }
882   ierr = PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
887 {
888   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
889   PetscErrorCode     ierr;
890   PetscInt           i,nranks;
891   const PetscMPIInt  *ranks;
892   const MPI_Aint     *target_disp;
893   const MPI_Datatype *mine,*remote;
894   MPI_Win            win;
895 
896   PetscFunctionBegin;
897   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
898   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
899   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
900   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
901   for (i=0; i<nranks; i++) {
902     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
903 
904     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);}
905     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
906     if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */
907       PetscMPIInt len;
908       char        errstring[MPI_MAX_ERROR_STRING];
909 
910       MPI_Error_string(ierr,errstring,&len);
911       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring);
912     }
913     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);}
914   }
915   PetscFunctionReturn(0);
916 }
917 
918 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
919 {
920   PetscSF_Window *w = (PetscSF_Window*)sf->data;
921   PetscErrorCode ierr;
922   MPI_Win        win;
923   MPI_Request    *reqs = NULL;
924 
925   PetscFunctionBegin;
926   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
927   if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); }
928   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
933 {
934   PetscErrorCode     ierr;
935   PetscInt           i,nranks;
936   const PetscMPIInt  *ranks;
937   const MPI_Datatype *mine,*remote;
938   const MPI_Aint     *target_disp;
939   MPI_Win            win;
940   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
941 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
942   PetscSFWindowFlavorType oldf;
943 #endif
944 
945   PetscFunctionBegin;
946   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
947   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
948   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
949 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
950   /* FetchAndOp without MPI_Get_Accumulate requires locking.
951      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
952   oldf = w->flavor;
953   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
954   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
955 #else
956   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
957 #endif
958   for (i=0; i<nranks; i++) {
959     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
960 
961 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
962     ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);CHKERRQ(ierr);
963     ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr);
964     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
965     if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */
966       PetscMPIInt len;
967       char        errstring[MPI_MAX_ERROR_STRING];
968 
969       MPI_Error_string(ierr,errstring,&len);
970       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring);
971     }
972     ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);
973 #else
974     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);CHKERRQ(ierr); }
975     ierr = MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
976     if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */
977       PetscMPIInt len;
978       char        errstring[MPI_MAX_ERROR_STRING];
979 
980       MPI_Error_string(ierr,errstring,&len);
981       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring);
982     }
983     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr); }
984 #endif
985   }
986 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
987   w->flavor = oldf;
988 #endif
989   PetscFunctionReturn(0);
990 }
991 
992 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
993 {
994   PetscErrorCode ierr;
995   MPI_Win        win;
996 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
997   PetscSF_Window *w = (PetscSF_Window*)sf->data;
998 #endif
999   MPI_Request    *reqs = NULL;
1000 
1001   PetscFunctionBegin;
1002   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
1003   if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); }
1004 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1005   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
1006 #else
1007   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);CHKERRQ(ierr);
1008 #endif
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
1013 {
1014   PetscSF_Window *w = (PetscSF_Window*)sf->data;
1015   PetscErrorCode ierr;
1016 
1017   PetscFunctionBegin;
1018   sf->ops->SetUp           = PetscSFSetUp_Window;
1019   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
1020   sf->ops->Reset           = PetscSFReset_Window;
1021   sf->ops->Destroy         = PetscSFDestroy_Window;
1022   sf->ops->View            = PetscSFView_Window;
1023   sf->ops->Duplicate       = PetscSFDuplicate_Window;
1024   sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Window;
1025   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Window;
1026   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
1027   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
1028   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1029   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
1030 
1031   ierr = PetscNewLog(sf,&w);CHKERRQ(ierr);
1032   sf->data  = (void*)w;
1033   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
1034   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1035   w->info   = MPI_INFO_NULL;
1036 
1037   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);CHKERRQ(ierr);
1038   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);CHKERRQ(ierr);
1039   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);CHKERRQ(ierr);
1040   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);CHKERRQ(ierr);
1041   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);CHKERRQ(ierr);
1042   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);CHKERRQ(ierr);
1043 
1044 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
1045   {
1046     PetscBool ackbug = PETSC_FALSE;
1047     ierr = PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);CHKERRQ(ierr);
1048     if (ackbug) {
1049       ierr = PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");CHKERRQ(ierr);
1050     } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1051   }
1052 #endif
1053   PetscFunctionReturn(0);
1054 }
1055