xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision 4e278199b78715991f5c71ebbd945c1489263e6c)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
9   PetscSFWinLink          wins;   /* List of active windows */
10   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11   PetscSF                 dynsf;
12   MPI_Info                info;
13 } PetscSF_Window;
14 
15 struct _n_PetscSFDataLink {
16   MPI_Datatype    unit;
17   MPI_Datatype    *mine;
18   MPI_Datatype    *remote;
19   PetscSFDataLink next;
20 };
21 
22 struct _n_PetscSFWinLink {
23   PetscBool               inuse;
24   size_t                  bytes;
25   void                    *addr;
26   void                    *paddr;
27   MPI_Win                 win;
28   MPI_Request             *reqs;
29   PetscSFWindowFlavorType flavor;
30   MPI_Aint                *dyn_target_addr;
31   PetscBool               epoch;
32   PetscSFWinLink          next;
33 };
34 
35 const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36 const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
37 
38 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40 {
41   PetscFunctionBegin;
42   if (*op == MPIU_SUM) *op = MPI_SUM;
43   else if (*op == MPIU_MAX) *op = MPI_MAX;
44   else if (*op == MPIU_MIN) *op = MPI_MIN;
45   PetscFunctionReturn(0);
46 }
47 
48 /*@C
49    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50 
51    Not Collective
52 
53    Input Arguments:
54 +  sf - star forest
55 -  unit - data type for each node
56 
57    Output Arguments:
58 +  localtypes - types describing part of local leaf buffer referencing each remote rank
59 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
60 
61    Level: developer
62 
63 .seealso: PetscSFSetGraph(), PetscSFView()
64 @*/
65 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
66 {
67   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
68   PetscErrorCode    ierr;
69   PetscSFDataLink   link;
70   PetscInt          i,nranks;
71   const PetscInt    *roffset,*rmine,*rremote;
72   const PetscMPIInt *ranks;
73 
74   PetscFunctionBegin;
75   /* Look for types in cache */
76   for (link=w->link; link; link=link->next) {
77     PetscBool match;
78     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
79     if (match) {
80       *localtypes  = link->mine;
81       *remotetypes = link->remote;
82       PetscFunctionReturn(0);
83     }
84   }
85 
86   /* Create new composite types for each send rank */
87   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr);
88   ierr = PetscNew(&link);CHKERRQ(ierr);
89   ierr = MPI_Type_dup(unit,&link->unit);CHKERRMPI(ierr);
90   ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr);
91   for (i=0; i<nranks; i++) {
92     PetscInt    rcount = roffset[i+1] - roffset[i];
93     PetscMPIInt *rmine,*rremote;
94 #if !defined(PETSC_USE_64BIT_INDICES)
95     rmine   = sf->rmine + sf->roffset[i];
96     rremote = sf->rremote + sf->roffset[i];
97 #else
98     PetscInt j;
99     ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr);
100     for (j=0; j<rcount; j++) {
101       ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr);
102       ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr);
103     }
104 #endif
105 
106     ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRMPI(ierr);
107     ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRMPI(ierr);
108 #if defined(PETSC_USE_64BIT_INDICES)
109     ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr);
110 #endif
111     ierr = MPI_Type_commit(&link->mine[i]);CHKERRMPI(ierr);
112     ierr = MPI_Type_commit(&link->remote[i]);CHKERRMPI(ierr);
113   }
114   link->next = w->link;
115   w->link    = link;
116 
117   *localtypes  = link->mine;
118   *remotetypes = link->remote;
119   PetscFunctionReturn(0);
120 }
121 
122 /*@C
123    PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
124 
125    Logically Collective
126 
127    Input Arguments:
128 +  sf - star forest for communication
129 -  flavor - flavor type
130 
131    Options Database Key:
132 .  -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
133 
134    Level: advanced
135 
136    Notes: Windows reusage follow this rules:
137 
138      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
139 
140      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
141        for i=1 to K
142          PetscSFOperationBegin(rootdata1,leafdata_whatever);
143          PetscSFOperationEnd(rootdata1,leafdata_whatever);
144          ...
145          PetscSFOperationBegin(rootdataN,leafdata_whatever);
146          PetscSFOperationEnd(rootdataN,leafdata_whatever);
147        endfor
148        The following pattern will instead raise an error
149          PetscSFOperationBegin(rootdata1,leafdata_whatever);
150          PetscSFOperationEnd(rootdata1,leafdata_whatever);
151          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
152          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
153 
154      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
155 
156      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
157 
158 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType()
159 @*/
160 PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
161 {
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
166   PetscValidLogicalCollectiveEnum(sf,flavor,2);
167   ierr = PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
172 {
173   PetscSF_Window *w = (PetscSF_Window*)sf->data;
174 
175   PetscFunctionBegin;
176   w->flavor = flavor;
177   PetscFunctionReturn(0);
178 }
179 
180 /*@C
181    PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
182 
183    Logically Collective
184 
185    Input Argument:
186 .  sf - star forest for communication
187 
188    Output Argument:
189 .  flavor - flavor type
190 
191    Level: advanced
192 
193 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType()
194 @*/
195 PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
196 {
197   PetscErrorCode ierr;
198 
199   PetscFunctionBegin;
200   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
201   PetscValidPointer(flavor,2);
202   ierr = PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
207 {
208   PetscSF_Window *w = (PetscSF_Window*)sf->data;
209 
210   PetscFunctionBegin;
211   *flavor = w->flavor;
212   PetscFunctionReturn(0);
213 }
214 
215 /*@C
216    PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
217 
218    Logically Collective
219 
220    Input Arguments:
221 +  sf - star forest for communication
222 -  sync - synchronization type
223 
224    Options Database Key:
225 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
226 
227    Level: advanced
228 
229 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
230 @*/
231 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
232 {
233   PetscErrorCode ierr;
234 
235   PetscFunctionBegin;
236   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
237   PetscValidLogicalCollectiveEnum(sf,sync,2);
238   ierr = PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
243 {
244   PetscSF_Window *w = (PetscSF_Window*)sf->data;
245 
246   PetscFunctionBegin;
247   w->sync = sync;
248   PetscFunctionReturn(0);
249 }
250 
251 /*@C
252    PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
253 
254    Logically Collective
255 
256    Input Argument:
257 .  sf - star forest for communication
258 
259    Output Argument:
260 .  sync - synchronization type
261 
262    Level: advanced
263 
264 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType()
265 @*/
266 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
267 {
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
272   PetscValidPointer(sync,2);
273   ierr = PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
278 {
279   PetscSF_Window *w = (PetscSF_Window*)sf->data;
280 
281   PetscFunctionBegin;
282   *sync = w->sync;
283   PetscFunctionReturn(0);
284 }
285 
286 /*@C
287    PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
288 
289    Logically Collective
290 
291    Input Argument:
292 +  sf - star forest for communication
293 -  info - MPI_Info handle
294 
295    Level: advanced
296 
297    Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
298 
299 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo()
300 @*/
301 PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
302 {
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
307   ierr = PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
312 {
313   PetscSF_Window *w = (PetscSF_Window*)sf->data;
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   if (w->info != MPI_INFO_NULL) {
318     ierr = MPI_Info_free(&w->info);CHKERRMPI(ierr);
319   }
320   if (info != MPI_INFO_NULL) {
321     ierr = MPI_Info_dup(info,&w->info);CHKERRMPI(ierr);
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 /*@C
327    PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
328 
329    Logically Collective
330 
331    Input Argument:
332 .  sf - star forest for communication
333 
334    Output Argument:
335 .  info - MPI_Info handle
336 
337    Level: advanced
338 
339    Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
340 
341 .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo()
342 @*/
343 PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
344 {
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
349   PetscValidPointer(info,2);
350   ierr = PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
355 {
356   PetscSF_Window *w = (PetscSF_Window*)sf->data;
357 
358   PetscFunctionBegin;
359   *info = w->info;
360   PetscFunctionReturn(0);
361 }
362 
363 /*
364    PetscSFGetWindow - Get a window for use with a given data type
365 
366    Collective on PetscSF
367 
368    Input Arguments:
369 +  sf - star forest
370 .  unit - data type
371 .  array - array to be sent
372 .  sync - type of synchronization PetscSFWindowSyncType
373 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
374 .  fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
375 .  postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
376 -  startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
377 
378    Output Arguments:
379 +  target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
380 +  reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
381 -  win - window
382 
383    Level: developer
384 .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
385 */
386 static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
387 {
388   PetscSF_Window *w = (PetscSF_Window*)sf->data;
389   PetscErrorCode ierr;
390   MPI_Aint       lb,lb_true,bytes,bytes_true;
391   PetscSFWinLink link;
392 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
393   MPI_Aint       winaddr;
394   PetscInt       nranks;
395 #endif
396   PetscBool      reuse = PETSC_FALSE, update = PETSC_FALSE;
397   PetscBool      dummy[2];
398   MPI_Aint       wsize;
399 
400   PetscFunctionBegin;
401   ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRMPI(ierr);
402   ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRMPI(ierr);
403   if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
404   if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
405   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
406   for (link=w->wins; reuse && link; link=link->next) {
407     PetscBool winok = PETSC_FALSE;
408     if (w->flavor != link->flavor) continue;
409     switch (w->flavor) {
410     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
411       if (array == link->addr) {
412         if (PetscDefined(USE_DEBUG)) {
413           dummy[0] = PETSC_TRUE;
414           dummy[1] = PETSC_TRUE;
415           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
416           ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
417           if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
418         }
419         if (link->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use");
420         if (epoch && link->epoch) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished");
421         winok = PETSC_TRUE;
422         link->paddr = array;
423       } else if (PetscDefined(USE_DEBUG)) {
424         dummy[0] = PETSC_FALSE;
425         dummy[1] = PETSC_FALSE;
426         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy  ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
427         ierr = MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));CHKERRMPI(ierr);
428         if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
429       }
430       break;
431     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
432     case PETSCSF_WINDOW_FLAVOR_SHARED:
433       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
434         update = PETSC_TRUE;
435         link->paddr = array;
436         winok = PETSC_TRUE;
437       }
438       break;
439     default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
440     }
441     if (winok) {
442       *win = link->win;
443       ierr = PetscInfo3(sf,"Reusing window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
444       goto found;
445     }
446   }
447 
448   wsize = (MPI_Aint)bytes*sf->nroots;
449   ierr = PetscNew(&link);CHKERRQ(ierr);
450   link->bytes           = bytes;
451   link->next            = w->wins;
452   link->flavor          = w->flavor;
453   link->dyn_target_addr = NULL;
454   link->reqs            = NULL;
455   w->wins               = link;
456   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
457     PetscInt i;
458 
459     ierr = PetscMalloc1(sf->nranks,&link->reqs);CHKERRQ(ierr);
460     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
461   }
462   switch (w->flavor) {
463   case PETSCSF_WINDOW_FLAVOR_CREATE:
464     ierr = MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRMPI(ierr);
465     link->addr  = array;
466     link->paddr = array;
467     break;
468 #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
469   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
470     ierr = MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);CHKERRMPI(ierr);
471 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
472     ierr = MPI_Win_attach(link->win,wsize ? array : &ierr,wsize);CHKERRMPI(ierr);
473 #else
474     ierr = MPI_Win_attach(link->win,array,wsize);CHKERRMPI(ierr);
475 #endif
476     link->addr  = array;
477     link->paddr = array;
478     if (!w->dynsf) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()");
479     ierr = PetscSFSetUp(w->dynsf);CHKERRQ(ierr);
480     ierr = PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
481     ierr = PetscMalloc1(nranks,&link->dyn_target_addr);CHKERRQ(ierr);
482     ierr = MPI_Get_address(array,&winaddr);CHKERRMPI(ierr);
483     ierr = PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);CHKERRQ(ierr);
484     ierr = PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);CHKERRQ(ierr);
485     break;
486   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
487     ierr = MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRMPI(ierr);
488     update = PETSC_TRUE;
489     link->paddr = array;
490     break;
491 #endif
492 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
493   case PETSCSF_WINDOW_FLAVOR_SHARED:
494     ierr = MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);CHKERRMPI(ierr);
495     update = PETSC_TRUE;
496     link->paddr = array;
497     break;
498 #endif
499   default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
500   }
501   ierr = PetscInfo3(sf,"New window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
502   *win = link->win;
503 
504 found:
505 
506   if (target_disp) *target_disp = link->dyn_target_addr;
507   if (reqs) *reqs = link->reqs;
508   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
509     PetscMPIInt rank;
510 
511     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
512     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);CHKERRMPI(ierr);}
513     ierr = PetscMemcpy(link->addr,array,sf->nroots*bytes);CHKERRQ(ierr);
514     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
515       ierr = MPI_Win_unlock(rank,*win);CHKERRMPI(ierr);
516       ierr = MPI_Win_fence(0,*win);CHKERRMPI(ierr);
517     }
518   }
519   link->inuse = PETSC_TRUE;
520   link->epoch = epoch;
521   if (epoch) {
522     switch (sync) {
523     case PETSCSF_WINDOW_SYNC_FENCE:
524       ierr = MPI_Win_fence(fenceassert,*win);CHKERRMPI(ierr);
525       break;
526     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
527       break;
528     case PETSCSF_WINDOW_SYNC_ACTIVE: {
529       MPI_Group   ingroup,outgroup;
530       PetscMPIInt isize,osize;
531 
532       /* OpenMPI 4.0.2 with btl=vader does not like calling
533          - MPI_Win_complete when ogroup is empty
534          - MPI_Win_wait when igroup is empty
535          So, we do not even issue the corresponding start and post calls
536          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
537          start(outgroup) has a matching post(ingroup)
538          and this is guaranteed by PetscSF
539       */
540       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
541       ierr = MPI_Group_size(ingroup,&isize);CHKERRMPI(ierr);
542       ierr = MPI_Group_size(outgroup,&osize);CHKERRMPI(ierr);
543       if (isize) {ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRMPI(ierr);}
544       if (osize) {ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRMPI(ierr);}
545     } break;
546     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
547     }
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 /*
553    PetscSFFindWindow - Finds a window that is already in use
554 
555    Not Collective
556 
557    Input Arguments:
558 +  sf - star forest
559 .  unit - data type
560 -  array - array with which the window is associated
561 
562    Output Arguments:
563 +  win - window
564 -  reqs - outstanding requests associated to the window
565 
566    Level: developer
567 
568 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
569 */
570 static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
571 {
572   PetscSF_Window *w = (PetscSF_Window*)sf->data;
573   PetscSFWinLink link;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   *win = MPI_WIN_NULL;
578   for (link=w->wins; link; link=link->next) {
579     if (array == link->paddr) {
580       ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
581       *win = link->win;
582       *reqs = link->reqs;
583       PetscFunctionReturn(0);
584     }
585   }
586   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
587 }
588 
589 /*
590    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
591 
592    Collective
593 
594    Input Arguments:
595 +  sf - star forest
596 .  unit - data type
597 .  array - array associated with window
598 .  sync - type of synchronization PetscSFWindowSyncType
599 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
600 .  update - if we have to update the local window array
601 -  win - window
602 
603    Level: developer
604 
605 .seealso: PetscSFFindWindow()
606 */
607 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
608 {
609   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
610   PetscErrorCode          ierr;
611   PetscSFWinLink          *p,link;
612   PetscBool               reuse = PETSC_FALSE;
613   PetscSFWindowFlavorType flavor;
614   void*                   laddr;
615   size_t                  bytes;
616 
617   PetscFunctionBegin;
618   for (p=&w->wins; *p; p=&(*p)->next) {
619     link = *p;
620     if (*win == link->win) {
621       if (array != link->paddr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
622       if (epoch != link->epoch) {
623         if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
624         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
625       }
626       laddr = link->addr;
627       flavor = link->flavor;
628       bytes = link->bytes;
629       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
630       else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
631       goto found;
632     }
633   }
634   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
635 
636 found:
637   ierr = PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
638   if (epoch) {
639     switch (sync) {
640     case PETSCSF_WINDOW_SYNC_FENCE:
641       ierr = MPI_Win_fence(fenceassert,*win);CHKERRMPI(ierr);
642       break;
643     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
644       break;
645     case PETSCSF_WINDOW_SYNC_ACTIVE: {
646       MPI_Group   ingroup,outgroup;
647       PetscMPIInt isize,osize;
648 
649       /* OpenMPI 4.0.2 with btl=wader does not like calling
650          - MPI_Win_complete when ogroup is empty
651          - MPI_Win_wait when igroup is empty
652          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
653          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
654          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
655       */
656       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
657       ierr = MPI_Group_size(ingroup,&isize);CHKERRMPI(ierr);
658       ierr = MPI_Group_size(outgroup,&osize);CHKERRMPI(ierr);
659       if (osize) {ierr = MPI_Win_complete(*win);CHKERRMPI(ierr);}
660       if (isize) {ierr = MPI_Win_wait(*win);CHKERRMPI(ierr);}
661     } break;
662     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
663     }
664   }
665   if (update) {
666     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
667       ierr = MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);CHKERRMPI(ierr);
668     }
669     ierr = PetscMemcpy(array,laddr,sf->nroots*bytes);CHKERRQ(ierr);
670   }
671   link->epoch = PETSC_FALSE;
672   link->inuse = PETSC_FALSE;
673   link->paddr = NULL;
674   if (!reuse) {
675     ierr = PetscFree(link->dyn_target_addr);CHKERRQ(ierr);
676     ierr = PetscFree(link->reqs);CHKERRQ(ierr);
677     ierr = MPI_Win_free(&link->win);CHKERRMPI(ierr);
678     ierr = PetscFree(link);CHKERRQ(ierr);
679     *win = MPI_WIN_NULL;
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
685 {
686   PetscSF_Window *w = (PetscSF_Window*)sf->data;
687   PetscErrorCode ierr;
688   MPI_Group      ingroup,outgroup;
689 
690   PetscFunctionBegin;
691   ierr = PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);CHKERRQ(ierr);
692   if (!w->dynsf) {
693     PetscInt    i;
694     PetscSFNode *remotes;
695 
696     ierr = PetscMalloc1(sf->nranks,&remotes);CHKERRQ(ierr);
697     for (i=0;i<sf->nranks;i++) {
698       remotes[i].rank  = sf->ranks[i];
699       remotes[i].index = 0;
700     }
701     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);CHKERRQ(ierr);
702     ierr = PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE);CHKERRQ(ierr); /* break recursion */
703     ierr = PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);CHKERRQ(ierr);
704     ierr = PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);CHKERRQ(ierr);
705   }
706   switch (w->sync) {
707   case PETSCSF_WINDOW_SYNC_ACTIVE:
708     ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
709   default:
710     break;
711   }
712   PetscFunctionReturn(0);
713 }
714 
715 static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
716 {
717   PetscSF_Window          *w = (PetscSF_Window*)sf->data;
718   PetscErrorCode          ierr;
719   PetscSFWindowFlavorType flavor = w->flavor;
720 
721   PetscFunctionBegin;
722   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");CHKERRQ(ierr);
723   ierr = PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);CHKERRQ(ierr);
724   ierr = PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);CHKERRQ(ierr);
725   ierr = PetscSFWindowSetFlavorType(sf,flavor);CHKERRQ(ierr);
726   ierr = PetscOptionsTail();CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
731 {
732   PetscSF_Window  *w = (PetscSF_Window*)sf->data;
733   PetscErrorCode  ierr;
734   PetscSFDataLink link,next;
735   PetscSFWinLink  wlink,wnext;
736   PetscInt        i;
737 
738   PetscFunctionBegin;
739   for (link=w->link; link; link=next) {
740     next = link->next;
741     ierr = MPI_Type_free(&link->unit);CHKERRMPI(ierr);
742     for (i=0; i<sf->nranks; i++) {
743       ierr = MPI_Type_free(&link->mine[i]);CHKERRMPI(ierr);
744       ierr = MPI_Type_free(&link->remote[i]);CHKERRMPI(ierr);
745     }
746     ierr = PetscFree2(link->mine,link->remote);CHKERRQ(ierr);
747     ierr = PetscFree(link);CHKERRQ(ierr);
748   }
749   w->link = NULL;
750   for (wlink=w->wins; wlink; wlink=wnext) {
751     wnext = wlink->next;
752     if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
753     ierr = PetscFree(wlink->dyn_target_addr);CHKERRQ(ierr);
754     ierr = PetscFree(wlink->reqs);CHKERRQ(ierr);
755     ierr = MPI_Win_free(&wlink->win);CHKERRMPI(ierr);
756     ierr = PetscFree(wlink);CHKERRQ(ierr);
757   }
758   w->wins = NULL;
759   ierr = PetscSFDestroy(&w->dynsf);CHKERRQ(ierr);
760   if (w->info != MPI_INFO_NULL) {
761     ierr = MPI_Info_free(&w->info);CHKERRMPI(ierr);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
767 {
768   PetscErrorCode ierr;
769 
770   PetscFunctionBegin;
771   ierr = PetscSFReset_Window(sf);CHKERRQ(ierr);
772   ierr = PetscFree(sf->data);CHKERRQ(ierr);
773   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);CHKERRQ(ierr);
774   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);CHKERRQ(ierr);
775   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);CHKERRQ(ierr);
776   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);CHKERRQ(ierr);
777   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);CHKERRQ(ierr);
778   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);CHKERRQ(ierr);
779   PetscFunctionReturn(0);
780 }
781 
782 static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
783 {
784   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
785   PetscErrorCode    ierr;
786   PetscBool         iascii;
787   PetscViewerFormat format;
788 
789   PetscFunctionBegin;
790   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
791   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
792   if (iascii) {
793     ierr = PetscViewerASCIIPrintf(viewer,"  current flavor=%s synchronization=%s MultiSF sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
794     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
795       if (w->info != MPI_INFO_NULL) {
796         PetscMPIInt k,nkeys;
797         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
798 
799         ierr = MPI_Info_get_nkeys(w->info,&nkeys);CHKERRMPI(ierr);
800         ierr = PetscViewerASCIIPrintf(viewer,"    current info with %d keys. Ordered key-value pairs follow:\n",nkeys);CHKERRQ(ierr);
801         for (k = 0; k < nkeys; k++) {
802           PetscMPIInt flag;
803 
804           ierr = MPI_Info_get_nthkey(w->info,k,key);CHKERRMPI(ierr);
805           ierr = MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);CHKERRMPI(ierr);
806           if (!flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key);
807           ierr = PetscViewerASCIIPrintf(viewer,"      %s = %s\n",key,value);CHKERRQ(ierr);
808         }
809       } else {
810         ierr = PetscViewerASCIIPrintf(viewer,"    current info=MPI_INFO_NULL\n");CHKERRQ(ierr);
811       }
812     }
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
818 {
819   PetscSF_Window        *w = (PetscSF_Window*)sf->data;
820   PetscErrorCode        ierr;
821   PetscSFWindowSyncType synctype;
822 
823   PetscFunctionBegin;
824   synctype = w->sync;
825   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
826   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
827   ierr = PetscSFWindowSetSyncType(newsf,synctype);CHKERRQ(ierr);
828   ierr = PetscSFWindowSetFlavorType(newsf,w->flavor);CHKERRQ(ierr);
829   ierr = PetscSFWindowSetInfo(newsf,w->info);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
834 {
835   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
836   PetscErrorCode     ierr;
837   PetscInt           i,nranks;
838   const PetscMPIInt  *ranks;
839   const MPI_Aint     *target_disp;
840   const MPI_Datatype *mine,*remote;
841   MPI_Request        *reqs;
842   MPI_Win            win;
843 
844   PetscFunctionBegin;
845   if (op != MPI_REPLACE) SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
846   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
847   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
848   ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);CHKERRQ(ierr);
849   for (i=0; i<nranks; i++) {
850     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
851 
852     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
853       ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRMPI(ierr);
854 #if defined(PETSC_HAVE_MPI_RGET)
855       ierr = MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);CHKERRMPI(ierr);
856 #else
857       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRMPI(ierr);
858 #endif
859     } else {
860       ierr = MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRMPI(ierr);
861     }
862   }
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
867 {
868   PetscSF_Window *w = (PetscSF_Window*)sf->data;
869   PetscErrorCode ierr;
870   MPI_Win        win;
871   MPI_Request    *reqs = NULL;
872 
873   PetscFunctionBegin;
874   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
875   if (reqs) {ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
876   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
877     PetscInt           i,nranks;
878     const PetscMPIInt  *ranks;
879 
880     ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
881     for (i=0; i<nranks; i++) {
882       ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);
883     }
884   }
885   ierr = PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
890 {
891   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
892   PetscErrorCode     ierr;
893   PetscInt           i,nranks;
894   const PetscMPIInt  *ranks;
895   const MPI_Aint     *target_disp;
896   const MPI_Datatype *mine,*remote;
897   MPI_Win            win;
898 
899   PetscFunctionBegin;
900   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
901   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
902   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
903   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
904   for (i=0; i<nranks; i++) {
905     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
906 
907     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRMPI(ierr);}
908     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);CHKERRMPI(ierr);
909     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);}
910   }
911   PetscFunctionReturn(0);
912 }
913 
914 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
915 {
916   PetscSF_Window *w = (PetscSF_Window*)sf->data;
917   PetscErrorCode ierr;
918   MPI_Win        win;
919   MPI_Request    *reqs = NULL;
920 
921   PetscFunctionBegin;
922   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
923   if (reqs) {ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
924   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
929 {
930   PetscErrorCode     ierr;
931   PetscInt           i,nranks;
932   const PetscMPIInt  *ranks;
933   const MPI_Datatype *mine,*remote;
934   const MPI_Aint     *target_disp;
935   MPI_Win            win;
936   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
937 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
938   PetscSFWindowFlavorType oldf;
939 #endif
940 
941   PetscFunctionBegin;
942   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
943   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
944   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
945 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
946   /* FetchAndOp without MPI_Get_Accumulate requires locking.
947      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
948   oldf = w->flavor;
949   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
950   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
951 #else
952   ierr = PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);CHKERRQ(ierr);
953 #endif
954   for (i=0; i<nranks; i++) {
955     MPI_Aint tdp = target_disp ? target_disp[i] : 0;
956 
957 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
958     ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);CHKERRMPI(ierr);
959     ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);CHKERRMPI(ierr);
960     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);CHKERRMPI(ierr);
961     ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);
962 #else
963     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);CHKERRMPI(ierr);}
964     ierr = MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win);CHKERRMPI(ierr);
965     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRMPI(ierr);}
966 #endif
967   }
968 #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
969   w->flavor = oldf;
970 #endif
971   PetscFunctionReturn(0);
972 }
973 
974 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
975 {
976   PetscErrorCode ierr;
977   MPI_Win        win;
978 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
979   PetscSF_Window *w = (PetscSF_Window*)sf->data;
980 #endif
981   MPI_Request    *reqs = NULL;
982 
983   PetscFunctionBegin;
984   ierr = PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);CHKERRQ(ierr);
985   if (reqs) {ierr = MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
986 #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
987   ierr = PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);CHKERRQ(ierr);
988 #else
989   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);CHKERRQ(ierr);
990 #endif
991   PetscFunctionReturn(0);
992 }
993 
994 PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
995 {
996   PetscSF_Window *w = (PetscSF_Window*)sf->data;
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   sf->ops->SetUp           = PetscSFSetUp_Window;
1001   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
1002   sf->ops->Reset           = PetscSFReset_Window;
1003   sf->ops->Destroy         = PetscSFDestroy_Window;
1004   sf->ops->View            = PetscSFView_Window;
1005   sf->ops->Duplicate       = PetscSFDuplicate_Window;
1006   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
1007   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
1008   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
1009   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
1010   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1011   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
1012 
1013   ierr = PetscNewLog(sf,&w);CHKERRQ(ierr);
1014   sf->data  = (void*)w;
1015   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
1016   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1017   w->info   = MPI_INFO_NULL;
1018 
1019   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);CHKERRQ(ierr);
1025 
1026 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
1027   {
1028     PetscBool ackbug = PETSC_FALSE;
1029     ierr = PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);CHKERRQ(ierr);
1030     if (ackbug) {
1031       ierr = PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");CHKERRQ(ierr);
1032     } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1033   }
1034 #endif
1035   PetscFunctionReturn(0);
1036 }
1037