xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision 5b6bfdb9644f185dbf5e5a09b808ec241507e1e7)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 
3 typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4 typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
5 
6 typedef struct {
7   PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8   PetscSFDataLink       link;   /* List of MPI data types and windows, lazily constructed for each data type */
9   PetscSFWinLink        wins;   /* List of active windows */
10 } PetscSF_Window;
11 
12 struct _n_PetscSFDataLink {
13   MPI_Datatype    unit;
14   MPI_Datatype    *mine;
15   MPI_Datatype    *remote;
16   PetscSFDataLink next;
17 };
18 
19 struct _n_PetscSFWinLink {
20   PetscBool      inuse;
21   size_t         bytes;
22   void           *addr;
23   MPI_Win        win;
24   PetscBool      epoch;
25   PetscSFWinLink next;
26 };
27 
28 const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",0};
29 
30 /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPIU_Allreduce) */
31 static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
32 {
33 
34   PetscFunctionBegin;
35   if (*op == MPIU_SUM) *op = MPI_SUM;
36   else if (*op == MPIU_MAX) *op = MPI_MAX;
37   else if (*op == MPIU_MIN) *op = MPI_MIN;
38   PetscFunctionReturn(0);
39 }
40 
41 /*@C
42    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
43 
44    Not Collective
45 
46    Input Arguments:
47 +  sf - star forest
48 -  unit - data type for each node
49 
50    Output Arguments:
51 +  localtypes - types describing part of local leaf buffer referencing each remote rank
52 -  remotetypes - types describing part of remote root buffer referenced for each remote rank
53 
54    Level: developer
55 
56 .seealso: PetscSFSetGraph(), PetscSFView()
57 @*/
58 static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
59 {
60   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
61   PetscErrorCode    ierr;
62   PetscSFDataLink   link;
63   PetscInt          i,nranks;
64   const PetscInt    *roffset,*rmine,*rremote;
65   const PetscMPIInt *ranks;
66 
67   PetscFunctionBegin;
68   /* Look for types in cache */
69   for (link=w->link; link; link=link->next) {
70     PetscBool match;
71     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
72     if (match) {
73       *localtypes  = link->mine;
74       *remotetypes = link->remote;
75       PetscFunctionReturn(0);
76     }
77   }
78 
79   /* Create new composite types for each send rank */
80   ierr = PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr);
81   ierr = PetscNew(&link);CHKERRQ(ierr);
82   ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr);
83   ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr);
84   for (i=0; i<nranks; i++) {
85     PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i];
86     PetscMPIInt           *rmine,*rremote;
87 #if !defined(PETSC_USE_64BIT_INDICES)
88     rmine   = sf->rmine + sf->roffset[i];
89     rremote = sf->rremote + sf->roffset[i];
90 #else
91     PetscInt j;
92     ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr);
93     for (j=0; j<rcount; j++) {
94       ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr);
95       ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr);
96     }
97 #endif
98     ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr);
99     ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr);
100 #if defined(PETSC_USE_64BIT_INDICES)
101     ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr);
102 #endif
103     ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr);
104     ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr);
105   }
106   link->next = w->link;
107   w->link    = link;
108 
109   *localtypes  = link->mine;
110   *remotetypes = link->remote;
111   PetscFunctionReturn(0);
112 }
113 
114 /*@C
115    PetscSFWindowSetSyncType - set synchrozitaion type for PetscSF communication
116 
117    Logically Collective
118 
119    Input Arguments:
120 +  sf - star forest for communication
121 -  sync - synchronization type
122 
123    Options Database Key:
124 .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
125 
126    Level: advanced
127 
128 .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
129 @*/
130 PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
131 {
132   PetscErrorCode ierr;
133 
134   PetscFunctionBegin;
135   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
136   PetscValidLogicalCollectiveEnum(sf,sync,2);
137   ierr = PetscUseMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
142 {
143   PetscSF_Window *w = (PetscSF_Window*)sf->data;
144 
145   PetscFunctionBegin;
146   w->sync = sync;
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151    PetscSFWindowGetSyncType - get synchrozitaion type for PetscSF communication
152 
153    Logically Collective
154 
155    Input Argument:
156 .  sf - star forest for communication
157 
158    Output Argument:
159 .  sync - synchronization type
160 
161    Level: advanced
162 
163 .seealso: PetscSFGetFromOptions(), PetscSFWindowSetSyncType()
164 @*/
165 PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
166 {
167   PetscErrorCode ierr;
168 
169   PetscFunctionBegin;
170   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
171   PetscValidPointer(sync,2);
172   ierr = PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));CHKERRQ(ierr);
173   PetscFunctionReturn(0);
174 }
175 
176 static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
177 {
178   PetscSF_Window *w = (PetscSF_Window*)sf->data;
179 
180   PetscFunctionBegin;
181   *sync = w->sync;
182   PetscFunctionReturn(0);
183 }
184 
185 /*@C
186    PetscSFGetWindow - Get a window for use with a given data type
187 
188    Collective on PetscSF
189 
190    Input Arguments:
191 +  sf - star forest
192 .  unit - data type
193 .  array - array to be sent
194 .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
195 .  fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_WINDOW_SYNC_FENCE
196 .  postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_WINDOW_SYNC_ACTIVE
197 -  startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_WINDOW_SYNC_ACTIVE
198 
199    Output Arguments:
200 .  win - window
201 
202    Level: developer
203 
204    Developer Notes:
205    This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to
206    reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse
207    whichever one is available, by copying the array into it if necessary.
208 
209 .seealso: PetscSFGetRanks(), PetscSFWindowGetDataTypes()
210 @*/
211 static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win)
212 {
213   PetscSF_Window *w = (PetscSF_Window*)sf->data;
214   PetscErrorCode ierr;
215   MPI_Aint       lb,lb_true,bytes,bytes_true;
216   PetscSFWinLink link;
217 
218   PetscFunctionBegin;
219   ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRQ(ierr);
220   ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRQ(ierr);
221   if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
222   if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
223   ierr = PetscNew(&link);CHKERRQ(ierr);
224 
225   link->bytes = bytes;
226   link->addr  = array;
227 
228   ierr = MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr);
229 
230   link->epoch = epoch;
231   link->next  = w->wins;
232   link->inuse = PETSC_TRUE;
233   w->wins     = link;
234   *win        = link->win;
235 
236   if (epoch) {
237     switch (w->sync) {
238     case PETSCSF_WINDOW_SYNC_FENCE:
239       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
240       break;
241     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
242       break;
243     case PETSCSF_WINDOW_SYNC_ACTIVE: {
244       MPI_Group ingroup,outgroup;
245       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
246       ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRQ(ierr);
247       ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRQ(ierr);
248     } break;
249     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 /*@C
256    PetscSFFindWindow - Finds a window that is already in use
257 
258    Not Collective
259 
260    Input Arguments:
261 +  sf - star forest
262 .  unit - data type
263 -  array - array with which the window is associated
264 
265    Output Arguments:
266 .  win - window
267 
268    Level: developer
269 
270 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
271 @*/
272 static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win)
273 {
274   PetscSF_Window *w = (PetscSF_Window*)sf->data;
275   PetscSFWinLink link;
276 
277   PetscFunctionBegin;
278   *win = MPI_WIN_NULL;
279   for (link=w->wins; link; link=link->next) {
280     if (array == link->addr) {
281       *win = link->win;
282       PetscFunctionReturn(0);
283     }
284   }
285   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
286   PetscFunctionReturn(0);
287 }
288 
289 /*@C
290    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
291 
292    Collective
293 
294    Input Arguments:
295 +  sf - star forest
296 .  unit - data type
297 .  array - array associated with window
298 .  epoch - close an epoch, must match argument to PetscSFGetWindow()
299 -  win - window
300 
301    Level: developer
302 
303 .seealso: PetscSFFindWindow()
304 @*/
305 static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win)
306 {
307   PetscSF_Window *w = (PetscSF_Window*)sf->data;
308   PetscErrorCode ierr;
309   PetscSFWinLink *p,link;
310 
311   PetscFunctionBegin;
312   for (p=&w->wins; *p; p=&(*p)->next) {
313     link = *p;
314     if (*win == link->win) {
315       if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
316       if (epoch != link->epoch) {
317         if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
318         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
319       }
320       *p = link->next;
321       goto found;
322     }
323   }
324   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
325 
326 found:
327   if (epoch) {
328     switch (w->sync) {
329     case PETSCSF_WINDOW_SYNC_FENCE:
330       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
331       break;
332     case PETSCSF_WINDOW_SYNC_LOCK:
333       break;                    /* handled outside */
334     case PETSCSF_WINDOW_SYNC_ACTIVE: {
335       ierr = MPI_Win_complete(*win);CHKERRQ(ierr);
336       ierr = MPI_Win_wait(*win);CHKERRQ(ierr);
337     } break;
338     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
339     }
340   }
341 
342   ierr = MPI_Win_free(&link->win);CHKERRQ(ierr);
343   ierr = PetscFree(link);CHKERRQ(ierr);
344   *win = MPI_WIN_NULL;
345   PetscFunctionReturn(0);
346 }
347 
348 static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
349 {
350   PetscSF_Window *w = (PetscSF_Window*)sf->data;
351   PetscErrorCode ierr;
352   MPI_Group      ingroup,outgroup;
353 
354   PetscFunctionBegin;
355   ierr = PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);CHKERRQ(ierr);
356   switch (w->sync) {
357   case PETSCSF_WINDOW_SYNC_ACTIVE:
358     ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
359   default:
360     break;
361   }
362   PetscFunctionReturn(0);
363 }
364 
365 static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
366 {
367   PetscSF_Window *w = (PetscSF_Window*)sf->data;
368   PetscErrorCode ierr;
369 
370   PetscFunctionBegin;
371   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");CHKERRQ(ierr);
372   ierr = PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);CHKERRQ(ierr);
373   ierr = PetscOptionsTail();CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 static PetscErrorCode PetscSFReset_Window(PetscSF sf)
378 {
379   PetscSF_Window  *w = (PetscSF_Window*)sf->data;
380   PetscErrorCode  ierr;
381   PetscSFDataLink link,next;
382   PetscSFWinLink  wlink,wnext;
383   PetscInt        i;
384 
385   PetscFunctionBegin;
386   for (link=w->link; link; link=next) {
387     next = link->next;
388     ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);
389     for (i=0; i<sf->nranks; i++) {
390       ierr = MPI_Type_free(&link->mine[i]);CHKERRQ(ierr);
391       ierr = MPI_Type_free(&link->remote[i]);CHKERRQ(ierr);
392     }
393     ierr = PetscFree2(link->mine,link->remote);CHKERRQ(ierr);
394     ierr = PetscFree(link);CHKERRQ(ierr);
395   }
396   w->link = NULL;
397   for (wlink=w->wins; wlink; wlink=wnext) {
398     wnext = wlink->next;
399     if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
400     ierr = MPI_Win_free(&wlink->win);CHKERRQ(ierr);
401     ierr = PetscFree(wlink);CHKERRQ(ierr);
402   }
403   w->wins = NULL;
404   PetscFunctionReturn(0);
405 }
406 
407 static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
408 {
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = PetscSFReset_Window(sf);CHKERRQ(ierr);
413   ierr = PetscFree(sf->data);CHKERRQ(ierr);
414   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);CHKERRQ(ierr);
415   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
420 {
421   PetscSF_Window *w = (PetscSF_Window*)sf->data;
422   PetscErrorCode ierr;
423   PetscBool      iascii;
424 
425   PetscFunctionBegin;
426   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
427   if (iascii) {
428     ierr = PetscViewerASCIIPrintf(viewer,"  synchronization=%s sort=%s\n",PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
429   }
430   PetscFunctionReturn(0);
431 }
432 
433 static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
434 {
435   PetscSF_Window        *w = (PetscSF_Window*)sf->data;
436   PetscErrorCode        ierr;
437   PetscSFWindowSyncType synctype;
438 
439   PetscFunctionBegin;
440   synctype = w->sync;
441   if (!sf->setupcalled) {
442     /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
443     synctype = PETSCSF_WINDOW_SYNC_LOCK;
444   }
445   ierr = PetscSFWindowSetSyncType(newsf,synctype);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
450 {
451   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
452   PetscErrorCode     ierr;
453   PetscInt           i,nranks;
454   const PetscMPIInt  *ranks;
455   const MPI_Datatype *mine,*remote;
456   MPI_Win            win;
457 
458   PetscFunctionBegin;
459   ierr = PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
460   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
461   ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);CHKERRQ(ierr);
462   for (i=0; i<nranks; i++) {
463     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);}
464     ierr = MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);CHKERRQ(ierr);
465     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);}
466   }
467   PetscFunctionReturn(0);
468 }
469 
470 PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
471 {
472   PetscErrorCode ierr;
473   MPI_Win        win;
474 
475   PetscFunctionBegin;
476   ierr = PetscSFFindWindow(sf,unit,rootdata,&win);CHKERRQ(ierr);
477   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,&win);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
482 {
483   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
484   PetscErrorCode     ierr;
485   PetscInt           i,nranks;
486   const PetscMPIInt  *ranks;
487   const MPI_Datatype *mine,*remote;
488   MPI_Win            win;
489 
490   PetscFunctionBegin;
491   ierr = PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
492   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
493   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
494   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&win);CHKERRQ(ierr);
495   for (i=0; i<nranks; i++) {
496     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);}
497     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);CHKERRQ(ierr);
498     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);}
499   }
500   PetscFunctionReturn(0);
501 }
502 
503 static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
504 {
505   PetscSF_Window *w = (PetscSF_Window*)sf->data;
506   PetscErrorCode ierr;
507   MPI_Win        win;
508 
509   PetscFunctionBegin;
510   if (!w->wins) PetscFunctionReturn(0);
511   ierr = PetscSFFindWindow(sf,unit,rootdata,&win);CHKERRQ(ierr);
512   ierr = MPI_Win_fence(MPI_MODE_NOSUCCEED,win);CHKERRQ(ierr);
513   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSUCCEED,&win);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
517 {
518   PetscErrorCode     ierr;
519   PetscInt           i,nranks;
520   const PetscMPIInt  *ranks;
521   const MPI_Datatype *mine,*remote;
522   MPI_Win            win;
523 
524   PetscFunctionBegin;
525   ierr = PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
526   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
527   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
528   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);CHKERRQ(ierr);
529   for (i=0; i<sf->nranks; i++) {
530     ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);CHKERRQ(ierr);
531     ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);CHKERRQ(ierr);
532     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);CHKERRQ(ierr);
533     ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);
534   }
535   PetscFunctionReturn(0);
536 }
537 
538 static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
539 {
540   PetscErrorCode ierr;
541   MPI_Win        win;
542 
543   PetscFunctionBegin;
544   ierr = PetscSFFindWindow(sf,unit,rootdata,&win);CHKERRQ(ierr);
545   /* Nothing to do currently because MPI_LOCK_EXCLUSIVE is used in PetscSFFetchAndOpBegin(), rendering this implementation synchronous. */
546   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSC_FALSE,0,&win);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 PETSC_EXTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
551 {
552   PetscSF_Window *w = (PetscSF_Window*)sf->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   sf->ops->SetUp           = PetscSFSetUp_Window;
557   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
558   sf->ops->Reset           = PetscSFReset_Window;
559   sf->ops->Destroy         = PetscSFDestroy_Window;
560   sf->ops->View            = PetscSFView_Window;
561   sf->ops->Duplicate       = PetscSFDuplicate_Window;
562   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
563   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
564   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
565   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
566   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
567   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
568 
569   ierr = PetscNewLog(sf,&w);CHKERRQ(ierr);
570   sf->data = (void*)w;
571   w->sync  = PETSCSF_WINDOW_SYNC_FENCE;
572 
573   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);CHKERRQ(ierr);
575 
576 #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
577   {
578     PetscBool ackbug = PETSC_FALSE;
579     ierr = PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);CHKERRQ(ierr);
580     if (ackbug) {
581       ierr = PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");CHKERRQ(ierr);
582     } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
583   }
584 #endif
585   PetscFunctionReturn(0);
586 }
587