xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision b698fc57f0bea7237255b29c1b77df0acc362ffd)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype    *mine;
18   MPI_Datatype    *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                    *addr;
26   void                    *paddr;
27   MPI_Win                 win;
28   MPI_Request             *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint                *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPIU_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40 {
41   PetscFunctionBegin;
42   if (*op == MPIU_SUM) *op = MPI_SUM;
43   else if (*op == MPIU_MAX) *op = MPI_MAX;
44   else if (*op == MPIU_MIN) *op = MPI_MIN;
45   PetscFunctionReturn(0);
46 }
47 
48 /*@C
49    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50 
51    Not Collective
52 
53    Input Arguments:
54 +  sf - star forest
55 -  unit - data type for each node
56 
57    Output Arguments:
58 +  localtypes - types describing part of local leaf buffer referencing each remote rank
59 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
60 
61    Level: developer
62 
63 .seealso: PetscSFSetGraph(), PetscSFView()
64 @*/
65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
66 {
67   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
68   PetscErrorCode    ierr;
69   PetscSFDataLink   link;
70   PetscInt          i,nranks;
71   const PetscInt    *roffset,*rmine,*rremote;
72   const PetscMPIInt *ranks;
73 
74   PetscFunctionBegin;
75   /* Look for types in cache */
76   for (link=w->link; link; link=link->next) {
77     PetscBool match;
78     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
79     if (match) {
80       *localtypes  = link->mine;
81       *remotetypes = link->remote;
82       PetscFunctionReturn(0);
83     }
84   }
85 
86   /* Create new composite types for each send rank */
87   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr);
88   ierr = PetscNew(&link);CHKERRQ(ierr);
89   ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr);
90   ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr);
91   for (i=0; i<nranks; i++) {
92     PetscInt    rcount = roffset[i+1] - roffset[i];
93     PetscMPIInt *rmine,*rremote;
94 #if !defined(PETSC_USE_64BIT_INDICES)
95     rmine   = sf->rmine + sf->roffset[i];
96     rremote = sf->rremote + sf->roffset[i];
97 #else
98     PetscInt j;
99     ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr);
100     for (j=0; j<rcount; j++) {
101       ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr);
102       ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr);
103     }
104 #endif
105 
106     ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr);
107     ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr);
108 #if defined(PETSC_USE_64BIT_INDICES)
109     ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr);
110 #endif
111     ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr);
112     ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr);
113   }
114   link->next = w->link;
115   w->link    = link;
116 
117   *localtypes  = link->mine;
118   *remotetypes = link->remote;
119   PetscFunctionReturn(0);
120 }
121 
122 /*@C
123    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
124 
125    Logically Collective
126 
127    Input Arguments:
128 +  sf - star forest for communication
129 -  flavor - flavor type
130 
131    Options Database Key:
132 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
133 
134    Level: advanced
135 
136    Notes: Windows reusage follow this rules:
137 
138      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
139 
140      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
141        for i=1 to K
142          PetscSFOperationBegin(rootdata1,leafdata_whatever);
143          PetscSFOperationEnd(rootdata1,leafdata_whatever);
144          ...
145          PetscSFOperationBegin(rootdataN,leafdata_whatever);
146          PetscSFOperationEnd(rootdataN,leafdata_whatever);
147        endfor
148        The following pattern will instead raise an error
149          PetscSFOperationBegin(rootdata1,leafdata_whatever);
150          PetscSFOperationEnd(rootdata1,leafdata_whatever);
151          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
152          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
153 
154      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
155 
156      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
157 
158 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType()
159 @*/
160 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
161 {
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
166   PetscValidLogicalCollectiveEnum(sf,flavor,2);
167   ierr = PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
172 {
173   PetscSF_Window *w = (PetscSF_Window*)sf->data;
174 
175   PetscFunctionBegin;
176   w->flavor = flavor;
177   PetscFunctionReturn(0);
178 }
179 
180 /*@C
181    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
182 
183    Logically Collective
184 
185    Input Argument:
186 .  sf - star forest for communication
187 
188    Output Argument:
189 .  flavor - flavor type
190 
191    Level: advanced
192 
193 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType()
194 @*/
195 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
196 {
197   PetscErrorCode ierr;
198 
199   PetscFunctionBegin;
200   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
201   PetscValidPointer(flavor,2);
202   ierr = PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
207 {
208   PetscSF_Window *w = (PetscSF_Window*)sf->data;
209 
210   PetscFunctionBegin;
211   *flavor = w->flavor;
212   PetscFunctionReturn(0);
213 }
214 
215 /*@C
216    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
217 
218    Logically Collective
219 
220    Input Arguments:
221 +  sf - star forest for communication
222 -  sync - synchronization type
223 
224    Options Database Key:
225 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
226 
227    Level: advanced
228 
229 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
230 @*/
231 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
232 {
233   PetscErrorCode ierr;
234 
235   PetscFunctionBegin;
236   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
237   PetscValidLogicalCollectiveEnum(sf,sync,2);
238   ierr = PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
243 {
244   PetscSF_Window *w = (PetscSF_Window*)sf->data;
245 
246   PetscFunctionBegin;
247   w->sync = sync;
248   PetscFunctionReturn(0);
249 }
250 
251 /*@C
252    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
253 
254    Logically Collective
255 
256    Input Argument:
257 .  sf - star forest for communication
258 
259    Output Argument:
260 .  sync - synchronization type
261 
262    Level: advanced
263 
264 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType()
265 @*/
266 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
267 {
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
272   PetscValidPointer(sync,2);
273   ierr = PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
278 {
279   PetscSF_Window *w = (PetscSF_Window*)sf->data;
280 
281   PetscFunctionBegin;
282   *sync = w->sync;
283   PetscFunctionReturn(0);
284 }
285 
286 /*@C
287    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
288 
289    Logically Collective
290 
291    Input Argument:
292 +  sf - star forest for communication
293 -  info - MPI_Info handle
294 
295    Level: advanced
296 
297    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
298 
299 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo()
300 @*/
301 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
302 {
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
307   ierr = PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
312 {
313   PetscSF_Window *w = (PetscSF_Window*)sf->data;
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   if (w->info != MPI_INFO_NULL) {
318     ierr = MPI_Info_free(&w->info);CHKERRQ(ierr);
319   }
320   if (info != MPI_INFO_NULL) {
321     ierr = MPI_Info_dup(info,&w->info);CHKERRQ(ierr);
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 /*@C
327    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
328 
329    Logically Collective
330 
331    Input Argument:
332 .  sf - star forest for communication
333 
334    Output Argument:
335 .  info - MPI_Info handle
336 
337    Level: advanced
338 
339    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
340 
341 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo()
342 @*/
343 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
344 {
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
349   PetscValidPointer(info,2);
350   ierr = PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
355 {
356   PetscSF_Window *w = (PetscSF_Window*)sf->data;
357 
358   PetscFunctionBegin;
359   *info = w->info;
360   PetscFunctionReturn(0);
361 }
362 
363 /*
364    PetscSFGetWindow - Get a window for use with a given data type
365 
366    Collective on PetscSF
367 
368    Input Arguments:
369 +  sf - star forest
370 .  unit - data type
371 .  array - array to be sent
372 .  sync - type of synchronization PetscSFWindowSyncType
373 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
374 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
375 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
376 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
377 
378    Output Arguments:
379 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
380 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
381 -  win - window
382 
383    Level: developer
384 .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
385 */
386 static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
387 {
388   PetscSF_Window *w = (PetscSF_Window*)sf->data;
389   PetscErrorCode ierr;
390   MPI_Aint       lb,lb_true,bytes,bytes_true;
391   PetscSFWinLink link;
392   MPI_Aint       winaddr;
393   PetscInt       nranks;
394   PetscBool      reuse = PETSC_FALSE, update = PETSC_FALSE;
395   PetscBool      dummy[2];
396   MPI_Aint       wsize;
397 
398   PetscFunctionBegin;
399   ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRQ(ierr);
400   ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRQ(ierr);
401   if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
402   if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
403   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
404   for (link=w->wins; reuse && link; link=link->next) {
405     PetscBool winok = PETSC_FALSE;
406     if (w->flavor != link->flavor) continue;
407     switch (w->flavor) {
408     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
409       if (array == link->addr) {
410         if (PetscDefined(USE_DEBUG)) {
411           dummy[0] = PETSC_TRUE;
412           dummy[1] = PETSC_TRUE;
413           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy  ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
414           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
415           if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
416         }
417         if (link->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use");
418         if (epoch && link->epoch) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished");
419         winok = PETSC_TRUE;
420         link->paddr = array;
421       } else if (PetscDefined(USE_DEBUG)) {
422         dummy[0] = PETSC_FALSE;
423         dummy[1] = PETSC_FALSE;
424         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy  ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
425         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
426         if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
427       }
428       break;
429     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
430     case PETSCSF_WINDOW_FLAVOR_SHARED:
431       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
432         update = PETSC_TRUE;
433         link->paddr = array;
434         winok = PETSC_TRUE;
435       }
436       break;
437     default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
438     }
439     if (winok) {
440       *win = link->win;
441       ierr = PetscInfo3(sf,"Reusing window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
442       goto found;
443     }
444   }
445 
446   wsize = (MPI_Aint)bytes*sf->nroots;
447   ierr = PetscNew(&link);CHKERRQ(ierr);
448   link->bytes           = bytes;
449   link->next            = w->wins;
450   link->flavor          = w->flavor;
451   link->dyn_target_addr = NULL;
452   link->reqs            = NULL;
453   w->wins               = link;
454   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
455     PetscInt i;
456 
457     ierr = PetscMalloc1(sf->nranks,&link->reqs);CHKERRQ(ierr);
458     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
459   }
460   switch (w->flavor) {
461   case PETSCSF_WINDOW_FLAVOR_CREATE:
462     ierr = MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr);
463     link->addr  = array;
464     link->paddr = array;
465     break;
466   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
467     ierr = MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr);
468 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
469     ierr = MPI_Win_attach(link->win,wsize ? array : &ierr,wsize);CHKERRQ(ierr);
470 #else
471     ierr = MPI_Win_attach(link->win,array,wsize);CHKERRQ(ierr);
472 #endif
473     link->addr  = array;
474     link->paddr = array;
475     if (!w->dynsf) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()");
476     ierr = PetscSFSetUp(w->dynsf);CHKERRQ(ierr);
477     ierr = PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
478     ierr = PetscMalloc1(nranks,&link->dyn_target_addr);CHKERRQ(ierr);
479     ierr = MPI_Get_address(array,&winaddr);CHKERRQ(ierr);
480     ierr = PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr);CHKERRQ(ierr);
481     ierr = PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr);CHKERRQ(ierr);
482     break;
483   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
484     ierr = MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRQ(ierr);
485     update = PETSC_TRUE;
486     link->paddr = array;
487     break;
488 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
489   case PETSCSF_WINDOW_FLAVOR_SHARED:
490     ierr = MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRQ(ierr);
491     update = PETSC_TRUE;
492     link->paddr = array;
493     break;
494 #endif
495   default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
496   }
497   ierr = PetscInfo3(sf,"New window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
498   *win = link->win;
499 
500 found:
501 
502   if (target_disp) *target_disp = link->dyn_target_addr;
503   if (reqs) *reqs = link->reqs;
504   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
505     PetscMPIInt rank;
506 
507     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
508     if (sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);CHKERRQ(ierr); }
509     ierr = PetscMemcpy(link->addr,array,sf->nroots*bytes);CHKERRQ(ierr);
510     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
511       ierr = MPI_Win_unlock(rank,*win);CHKERRQ(ierr);
512       ierr = MPI_Win_fence(0,*win);CHKERRQ(ierr);
513     }
514   }
515   link->inuse = PETSC_TRUE;
516   link->epoch = epoch;
517   if (epoch) {
518     switch (sync) {
519     case PETSCSF_WINDOW_SYNC_FENCE:
520       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
521       break;
522     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
523       break;
524     case PETSCSF_WINDOW_SYNC_ACTIVE: {
525       MPI_Group   ingroup,outgroup;
526       PetscMPIInt isize,osize;
527 
528       /* OpenMPI 4.0.2 with btl=vader does not like calling
529          - MPI_Win_complete when ogroup is empty
530          - MPI_Win_wait when igroup is empty
531          So, we do not even issue the corresponding start and post calls
532          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
533          start(outgroup) has a matching post(ingroup)
534          and this is guaranteed by PetscSF
535       */
536       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
537       ierr = MPI_Group_size(ingroup,&isize);CHKERRQ(ierr);
538       ierr = MPI_Group_size(outgroup,&osize);CHKERRQ(ierr);
539       if (isize) { ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRQ(ierr); }
540       if (osize) { ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRQ(ierr); }
541     } break;
542     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
543     }
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 /*
549    PetscSFFindWindow - Finds a window that is already in use
550 
551    Not Collective
552 
553    Input Arguments:
554 +  sf - star forest
555 .  unit - data type
556 -  array - array with which the window is associated
557 
558    Output Arguments:
559 +  win - window
560 -  reqs - outstanding requests associated to the window
561 
562    Level: developer
563 
564 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
565 */
566 static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
567 {
568   PetscSF_Window *w = (PetscSF_Window*)sf->data;
569   PetscSFWinLink link;
570   PetscErrorCode ierr;
571 
572   PetscFunctionBegin;
573   *win = MPI_WIN_NULL;
574   for (link=w->wins; link; link=link->next) {
575     if (array == link->paddr) {
576       ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
577       *win = link->win;
578       *reqs = link->reqs;
579       PetscFunctionReturn(0);
580     }
581   }
582   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
583 }
584 
585 /*
586    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
587 
588    Collective
589 
590    Input Arguments:
591 +  sf - star forest
592 .  unit - data type
593 .  array - array associated with window
594 .  sync - type of synchronization PetscSFWindowSyncType
595 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
596 .  update - if we have to update the local window array
597 -  win - window
598 
599    Level: developer
600 
601 .seealso: PetscSFFindWindow()
602 */
603 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
604 {
605   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
606   PetscErrorCode          ierr;
607   PetscSFWinLink          *p,link;
608   PetscBool               reuse = PETSC_FALSE;
609   PetscSFWindowFlavorType flavor;
610   void*                   laddr;
611   size_t                  bytes;
612 
613   PetscFunctionBegin;
614   for (p=&w->wins; *p; p=&(*p)->next) {
615     link = *p;
616     if (*win == link->win) {
617       if (array != link->paddr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
618       if (epoch != link->epoch) {
619         if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
620         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
621       }
622       laddr = link->addr;
623       flavor = link->flavor;
624       bytes = link->bytes;
625       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
626       else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
627       goto found;
628     }
629   }
630   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
631 
632 found:
633   ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
634   if (epoch) {
635     switch (sync) {
636     case PETSCSF_WINDOW_SYNC_FENCE:
637       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
638       break;
639     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
640       break;
641     case PETSCSF_WINDOW_SYNC_ACTIVE: {
642       MPI_Group   ingroup,outgroup;
643       PetscMPIInt isize,osize;
644 
645       /* OpenMPI 4.0.2 with btl=wader does not like calling
646          - MPI_Win_complete when ogroup is empty
647          - MPI_Win_wait when igroup is empty
648          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
649          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
650          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
651       */
652       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
653       ierr = MPI_Group_size(ingroup,&isize);CHKERRQ(ierr);
654       ierr = MPI_Group_size(outgroup,&osize);CHKERRQ(ierr);
655       if (osize) { ierr = MPI_Win_complete(*win);CHKERRQ(ierr); }
656       if (isize) { ierr = MPI_Win_wait(*win);CHKERRQ(ierr); }
657     } break;
658     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
659     }
660   }
661   if (update) {
662     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
663       ierr = MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);CHKERRQ(ierr);
664     }
665     ierr = PetscMemcpy(array,laddr,sf->nroots*bytes);CHKERRQ(ierr);
666   }
667   link->epoch = PETSC_FALSE;
668   link->inuse = PETSC_FALSE;
669   link->paddr = NULL;
670   if (!reuse) {
671     ierr = PetscFree(link->dyn_target_addr);CHKERRQ(ierr);
672     ierr = PetscFree(link->reqs);CHKERRQ(ierr);
673     ierr = MPI_Win_free(&link->win);CHKERRQ(ierr);
674     ierr = PetscFree(link);CHKERRQ(ierr);
675     *win = MPI_WIN_NULL;
676   }
677   PetscFunctionReturn(0);
678 }
679 
680 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
681 {
682   PetscSF_Window *w = (PetscSF_Window*)sf->data;
683   PetscErrorCode ierr;
684   MPI_Group      ingroup,outgroup;
685 
686   PetscFunctionBegin;
687   ierr = PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);CHKERRQ(ierr);
688   if (!w->dynsf) {
689     PetscInt    i;
690     PetscSFNode *remotes;
691 
692     ierr = PetscMalloc1(sf->nranks,&remotes);CHKERRQ(ierr);
693     for (i=0;i<sf->nranks;i++) {
694       remotes[i].rank  = sf->ranks[i];
695       remotes[i].index = 0;
696     }
697     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);CHKERRQ(ierr);
698     ierr = PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE);CHKERRQ(ierr); /* break recursion */
699     ierr = PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);CHKERRQ(ierr);
700     ierr = PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);CHKERRQ(ierr);
701   }
702   switch (w->sync) {
703   case PETSCSF_WINDOW_SYNC_ACTIVE:
704     ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
705   default:
706     break;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
712 {
713   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
714   PetscErrorCode          ierr;
715   PetscSFWindowFlavorType flavor = w->flavor;
716 
717   PetscFunctionBegin;
718   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");CHKERRQ(ierr);
719   ierr = PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);CHKERRQ(ierr);
720   ierr = PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);CHKERRQ(ierr);
721   ierr = PetscSFWindowSetFlavorType(sf,flavor);CHKERRQ(ierr);
722   ierr = PetscOptionsTail();CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
727 {
728   PetscSF_Window  *w = (PetscSF_Window*)sf->data;
729   PetscErrorCode  ierr;
730   PetscSFDataLink link,next;
731   PetscSFWinLink  wlink,wnext;
732   PetscInt        i;
733 
734   PetscFunctionBegin;
735   for (link=w->link; link; link=next) {
736     next = link->next;
737     ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);
738     for (i=0; i<sf->nranks; i++) {
739       ierr = MPI_Type_free(&link->mine[i]);CHKERRQ(ierr);
740       ierr = MPI_Type_free(&link->remote[i]);CHKERRQ(ierr);
741     }
742     ierr = PetscFree2(link->mine,link->remote);CHKERRQ(ierr);
743     ierr = PetscFree(link);CHKERRQ(ierr);
744   }
745   w->link = NULL;
746   for (wlink=w->wins; wlink; wlink=wnext) {
747     wnext = wlink->next;
748     if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
749     ierr = PetscFree(wlink->dyn_target_addr);CHKERRQ(ierr);
750     ierr = PetscFree(wlink->reqs);CHKERRQ(ierr);
751     ierr = MPI_Win_free(&wlink->win);CHKERRQ(ierr);
752     ierr = PetscFree(wlink);CHKERRQ(ierr);
753   }
754   w->wins = NULL;
755   ierr = PetscSFDestroy(&w->dynsf);CHKERRQ(ierr);
756   if (w->info != MPI_INFO_NULL) {
757     ierr = MPI_Info_free(&w->info);CHKERRQ(ierr);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
763 {
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = PetscSFReset_Window(sf);CHKERRQ(ierr);
768   ierr = PetscFree(sf->data);CHKERRQ(ierr);
769   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);CHKERRQ(ierr);
770   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);CHKERRQ(ierr);
771   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);CHKERRQ(ierr);
772   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);CHKERRQ(ierr);
773   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);CHKERRQ(ierr);
774   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 
778 static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
779 {
780   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
781   PetscErrorCode    ierr;
782   PetscBool         iascii;
783   PetscViewerFormat format;
784 
785   PetscFunctionBegin;
786   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
787   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
788   if (iascii) {
789     ierr = PetscViewerASCIIPrintf(viewer,"  current flavor=%s synchronization=%s sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
790     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
791       if (w->info != MPI_INFO_NULL) {
792         PetscMPIInt k,nkeys;
793         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
794 
795         ierr = MPI_Info_get_nkeys(w->info,&nkeys);CHKERRQ(ierr);
796         ierr = PetscViewerASCIIPrintf(viewer,"    current info with %d keys. Ordered key-value pairs follow:\n",nkeys);CHKERRQ(ierr);
797         for (k = 0; k < nkeys; k++) {
798           PetscMPIInt flag;
799 
800           ierr = MPI_Info_get_nthkey(w->info,k,key);CHKERRQ(ierr);
801           ierr = MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);CHKERRQ(ierr);
802           if (!flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key);
803           ierr = PetscViewerASCIIPrintf(viewer,"      %s = %s\n",key,value);CHKERRQ(ierr);
804         }
805       } else {
806         ierr = PetscViewerASCIIPrintf(viewer,"    current info=MPI_INFO_NULL\n");CHKERRQ(ierr);
807       }
808     }
809   }
810   PetscFunctionReturn(0);
811 }
812 
813 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
814 {
815   PetscSF_Window        *w = (PetscSF_Window*)sf->data;
816   PetscErrorCode        ierr;
817   PetscSFWindowSyncType synctype;
818 
819   PetscFunctionBegin;
820   synctype = w->sync;
821   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
822   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
823   ierr = PetscSFWindowSetSyncType(newsf,synctype);CHKERRQ(ierr);
824   ierr = PetscSFWindowSetFlavorType(newsf,w->flavor);CHKERRQ(ierr);
825   ierr = PetscSFWindowSetInfo(newsf,w->info);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 static PetscErrorCode PetscSFBcastAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
830 {
831   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
832   PetscErrorCode     ierr;
833   PetscInt           i,nranks;
834   const PetscMPIInt  *ranks;
835   const MPI_Aint     *target_disp;
836   const MPI_Datatype *mine,*remote;
837   MPI_Request        *reqs;
838   MPI_Win            win;
839 
840   PetscFunctionBegin;
841   if (op != MPI_REPLACE || op != MPIU_REPLACE) SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastAndOpBegin_Window with op!=MPI_REPLACE has not been implemented");
842   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
843   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
844   ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);CHKERRQ(ierr);
845   for (i=0; i<nranks; i++) {
846     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
847 
848     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
849       ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);
850 #if defined(PETSC_HAVE_MPI_RGET)
851       ierr = MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);CHKERRQ(ierr);
852 #else
853       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr);
854 #endif
855     } else {
856       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr);
857     }
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode PetscSFBcastAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
863 {
864   PetscSF_Window *w = (PetscSF_Window*)sf->data;
865   PetscErrorCode ierr;
866   MPI_Win        win;
867   MPI_Request    *reqs = NULL;
868 
869   PetscFunctionBegin;
870   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
871   if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); }
872   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
873     PetscInt           i,nranks;
874     const PetscMPIInt  *ranks;
875 
876     ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
877     for (i=0; i<nranks; i++) {
878       ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);
879     }
880   }
881   ierr = PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);CHKERRQ(ierr);
882   PetscFunctionReturn(0);
883 }
884 
885 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
886 {
887   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
888   PetscErrorCode     ierr;
889   PetscInt           i,nranks;
890   const PetscMPIInt  *ranks;
891   const MPI_Aint     *target_disp;
892   const MPI_Datatype *mine,*remote;
893   MPI_Win            win;
894 
895   PetscFunctionBegin;
896   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
897   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
898   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
899   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
900   for (i=0; i<nranks; i++) {
901     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
902 
903     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);}
904     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
905     if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */
906       PetscMPIInt len;
907       char        errstring[MPI_MAX_ERROR_STRING];
908 
909       MPI_Error_string(ierr,errstring,&len);
910       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring);
911     }
912     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);}
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
918 {
919   PetscSF_Window *w = (PetscSF_Window*)sf->data;
920   PetscErrorCode ierr;
921   MPI_Win        win;
922   MPI_Request    *reqs = NULL;
923 
924   PetscFunctionBegin;
925   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
926   if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); }
927   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
932 {
933   PetscErrorCode     ierr;
934   PetscInt           i,nranks;
935   const PetscMPIInt  *ranks;
936   const MPI_Datatype *mine,*remote;
937   const MPI_Aint     *target_disp;
938   MPI_Win            win;
939   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
940 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
941   PetscSFWindowFlavorType oldf;
942 #endif
943 
944   PetscFunctionBegin;
945   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
946   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
947   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
948 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
949   /* FetchAndOp without MPI_Get_Accumulate requires locking.
950      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
951   oldf = w->flavor;
952   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
953   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
954 #else
955   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
956 #endif
957   for (i=0; i<nranks; i++) {
958     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
959 
960 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
961     ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);CHKERRQ(ierr);
962     ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRQ(ierr);
963     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
964     if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */
965       PetscMPIInt len;
966       char        errstring[MPI_MAX_ERROR_STRING];
967 
968       MPI_Error_string(ierr,errstring,&len);
969       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring);
970     }
971     ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);
972 #else
973     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);CHKERRQ(ierr); }
974     ierr = MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
975     if (ierr) { /* intercept the MPI error since the combination of unit and op is not supported */
976       PetscMPIInt len;
977       char        errstring[MPI_MAX_ERROR_STRING];
978 
979       MPI_Error_string(ierr,errstring,&len);
980       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Internal error in MPI: %s",errstring);
981     }
982     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) { ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr); }
983 #endif
984   }
985 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
986   w->flavor = oldf;
987 #endif
988   PetscFunctionReturn(0);
989 }
990 
991 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
992 {
993   PetscErrorCode ierr;
994   MPI_Win        win;
995 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
996   PetscSF_Window *w = (PetscSF_Window*)sf->data;
997 #endif
998   MPI_Request    *reqs = NULL;
999 
1000   PetscFunctionBegin;
1001   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
1002   if (reqs) { ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); }
1003 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
1004   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
1005 #else
1006   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);CHKERRQ(ierr);
1007 #endif
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
1012 {
1013   PetscSF_Window *w = (PetscSF_Window*)sf->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   sf->ops->SetUp           = PetscSFSetUp_Window;
1018   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
1019   sf->ops->Reset           = PetscSFReset_Window;
1020   sf->ops->Destroy         = PetscSFDestroy_Window;
1021   sf->ops->View            = PetscSFView_Window;
1022   sf->ops->Duplicate       = PetscSFDuplicate_Window;
1023   sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Window;
1024   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Window;
1025   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
1026   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
1027   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1028   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
1029 
1030   ierr = PetscNewLog(sf,&w);CHKERRQ(ierr);
1031   sf->data  = (void*)w;
1032   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
1033   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1034   w->info   = MPI_INFO_NULL;
1035 
1036   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);CHKERRQ(ierr);
1037   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);CHKERRQ(ierr);
1038   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);CHKERRQ(ierr);
1039   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);CHKERRQ(ierr);
1040   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);CHKERRQ(ierr);
1041   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);CHKERRQ(ierr);
1042 
1043 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
1044   {
1045     PetscBool ackbug = PETSC_FALSE;
1046     ierr = PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);CHKERRQ(ierr);
1047     if (ackbug) {
1048       ierr = PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");CHKERRQ(ierr);
1049     } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1050   }
1051 #endif
1052   PetscFunctionReturn(0);
1053 }
1054