xref: /petsc/src/vec/is/sf/impls/window/sfwindow.c (revision dec1416f15364d8a66cef6f4b2a5a2aba5192d13)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
295fce210SBarry Smith 
395fce210SBarry Smith typedef struct _n_PetscSFDataLink *PetscSFDataLink;
495fce210SBarry Smith typedef struct _n_PetscSFWinLink  *PetscSFWinLink;
595fce210SBarry Smith 
695fce210SBarry Smith typedef struct {
795fce210SBarry Smith   PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
895fce210SBarry Smith   PetscSFDataLink       link;   /* List of MPI data types and windows, lazily constructed for each data type */
995fce210SBarry Smith   PetscSFWinLink        wins;   /* List of active windows */
1095fce210SBarry Smith } PetscSF_Window;
1195fce210SBarry Smith 
1295fce210SBarry Smith struct _n_PetscSFDataLink {
1395fce210SBarry Smith   MPI_Datatype    unit;
1495fce210SBarry Smith   MPI_Datatype    *mine;
1595fce210SBarry Smith   MPI_Datatype    *remote;
1695fce210SBarry Smith   PetscSFDataLink next;
1795fce210SBarry Smith };
1895fce210SBarry Smith 
1995fce210SBarry Smith struct _n_PetscSFWinLink {
2095fce210SBarry Smith   PetscBool      inuse;
2195fce210SBarry Smith   size_t         bytes;
2295fce210SBarry Smith   void           *addr;
2395fce210SBarry Smith   MPI_Win        win;
2495fce210SBarry Smith   PetscBool      epoch;
2595fce210SBarry Smith   PetscSFWinLink next;
2695fce210SBarry Smith };
2795fce210SBarry Smith 
2895fce210SBarry Smith const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",0};
2995fce210SBarry Smith 
30b2566f29SBarry Smith /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPIU_Allreduce) */
3195fce210SBarry Smith static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
3295fce210SBarry Smith {
3395fce210SBarry Smith 
3495fce210SBarry Smith   PetscFunctionBegin;
3595fce210SBarry Smith   if (*op == MPIU_SUM) *op = MPI_SUM;
3695fce210SBarry Smith   else if (*op == MPIU_MAX) *op = MPI_MAX;
3795fce210SBarry Smith   else if (*op == MPIU_MIN) *op = MPI_MIN;
3895fce210SBarry Smith   PetscFunctionReturn(0);
3995fce210SBarry Smith }
4095fce210SBarry Smith 
4195fce210SBarry Smith /*@C
4295fce210SBarry Smith    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
4395fce210SBarry Smith 
4495fce210SBarry Smith    Not Collective
4595fce210SBarry Smith 
4695fce210SBarry Smith    Input Arguments:
4795fce210SBarry Smith +  sf - star forest
4895fce210SBarry Smith -  unit - data type for each node
4995fce210SBarry Smith 
5095fce210SBarry Smith    Output Arguments:
5195fce210SBarry Smith +  localtypes - types describing part of local leaf buffer referencing each remote rank
5295fce210SBarry Smith -  remotetypes - types describing part of remote root buffer referenced for each remote rank
5395fce210SBarry Smith 
5495fce210SBarry Smith    Level: developer
5595fce210SBarry Smith 
5695fce210SBarry Smith .seealso: PetscSFSetGraph(), PetscSFView()
5795fce210SBarry Smith @*/
5895fce210SBarry Smith static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
5995fce210SBarry Smith {
6095fce210SBarry Smith   PetscSF_Window    *w = (PetscSF_Window*)sf->data;
6195fce210SBarry Smith   PetscErrorCode    ierr;
6295fce210SBarry Smith   PetscSFDataLink   link;
6395fce210SBarry Smith   PetscInt          i,nranks;
6495fce210SBarry Smith   const PetscInt    *roffset,*rmine,*rremote;
6595fce210SBarry Smith   const PetscMPIInt *ranks;
6695fce210SBarry Smith 
6795fce210SBarry Smith   PetscFunctionBegin;
6895fce210SBarry Smith   /* Look for types in cache */
6995fce210SBarry Smith   for (link=w->link; link; link=link->next) {
7095fce210SBarry Smith     PetscBool match;
7195fce210SBarry Smith     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
7295fce210SBarry Smith     if (match) {
7395fce210SBarry Smith       *localtypes  = link->mine;
7495fce210SBarry Smith       *remotetypes = link->remote;
7595fce210SBarry Smith       PetscFunctionReturn(0);
7695fce210SBarry Smith     }
7795fce210SBarry Smith   }
7895fce210SBarry Smith 
7995fce210SBarry Smith   /* Create new composite types for each send rank */
80*dec1416fSJunchao Zhang   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr);
8195dccacaSBarry Smith   ierr = PetscNew(&link);CHKERRQ(ierr);
8295fce210SBarry Smith   ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr);
83dcca6d9dSJed Brown   ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr);
8495fce210SBarry Smith   for (i=0; i<nranks; i++) {
8595fce210SBarry Smith     PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i];
8695fce210SBarry Smith     PetscMPIInt           *rmine,*rremote;
8795fce210SBarry Smith #if !defined(PETSC_USE_64BIT_INDICES)
8895fce210SBarry Smith     rmine   = sf->rmine + sf->roffset[i];
8995fce210SBarry Smith     rremote = sf->rremote + sf->roffset[i];
9095fce210SBarry Smith #else
9195fce210SBarry Smith     PetscInt j;
92dcca6d9dSJed Brown     ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr);
9395fce210SBarry Smith     for (j=0; j<rcount; j++) {
9495fce210SBarry Smith       ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr);
9595fce210SBarry Smith       ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr);
9695fce210SBarry Smith     }
9795fce210SBarry Smith #endif
9895fce210SBarry Smith     ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr);
9995fce210SBarry Smith     ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr);
10095fce210SBarry Smith #if defined(PETSC_USE_64BIT_INDICES)
10195fce210SBarry Smith     ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr);
10295fce210SBarry Smith #endif
10395fce210SBarry Smith     ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr);
10495fce210SBarry Smith     ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr);
10595fce210SBarry Smith   }
10695fce210SBarry Smith   link->next = w->link;
10795fce210SBarry Smith   w->link    = link;
10895fce210SBarry Smith 
10995fce210SBarry Smith   *localtypes  = link->mine;
11095fce210SBarry Smith   *remotetypes = link->remote;
11195fce210SBarry Smith   PetscFunctionReturn(0);
11295fce210SBarry Smith }
11395fce210SBarry Smith 
11495fce210SBarry Smith /*@C
11595fce210SBarry Smith    PetscSFWindowSetSyncType - set synchrozitaion type for PetscSF communication
11695fce210SBarry Smith 
11795fce210SBarry Smith    Logically Collective
11895fce210SBarry Smith 
11995fce210SBarry Smith    Input Arguments:
12095fce210SBarry Smith +  sf - star forest for communication
12195fce210SBarry Smith -  sync - synchronization type
12295fce210SBarry Smith 
12395fce210SBarry Smith    Options Database Key:
12460263706SJed Brown .  -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
12595fce210SBarry Smith 
12695fce210SBarry Smith    Level: advanced
12795fce210SBarry Smith 
12895fce210SBarry Smith .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
12995fce210SBarry Smith @*/
13095fce210SBarry Smith PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
13195fce210SBarry Smith {
13295fce210SBarry Smith   PetscErrorCode ierr;
13395fce210SBarry Smith 
13495fce210SBarry Smith   PetscFunctionBegin;
13595fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
13695fce210SBarry Smith   PetscValidLogicalCollectiveEnum(sf,sync,2);
13795fce210SBarry Smith   ierr = PetscUseMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));CHKERRQ(ierr);
13895fce210SBarry Smith   PetscFunctionReturn(0);
13995fce210SBarry Smith }
14095fce210SBarry Smith 
141f7a08781SBarry Smith static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
14295fce210SBarry Smith {
14395fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
14495fce210SBarry Smith 
14595fce210SBarry Smith   PetscFunctionBegin;
14695fce210SBarry Smith   w->sync = sync;
14795fce210SBarry Smith   PetscFunctionReturn(0);
14895fce210SBarry Smith }
14995fce210SBarry Smith 
15095fce210SBarry Smith /*@C
15195fce210SBarry Smith    PetscSFWindowGetSyncType - get synchrozitaion type for PetscSF communication
15295fce210SBarry Smith 
15395fce210SBarry Smith    Logically Collective
15495fce210SBarry Smith 
15595fce210SBarry Smith    Input Argument:
15695fce210SBarry Smith .  sf - star forest for communication
15795fce210SBarry Smith 
15895fce210SBarry Smith    Output Argument:
15995fce210SBarry Smith .  sync - synchronization type
16095fce210SBarry Smith 
16195fce210SBarry Smith    Level: advanced
16295fce210SBarry Smith 
16395fce210SBarry Smith .seealso: PetscSFGetFromOptions(), PetscSFWindowSetSyncType()
16495fce210SBarry Smith @*/
16595fce210SBarry Smith PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
16695fce210SBarry Smith {
16795fce210SBarry Smith   PetscErrorCode ierr;
16895fce210SBarry Smith 
16995fce210SBarry Smith   PetscFunctionBegin;
17095fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
17195fce210SBarry Smith   PetscValidPointer(sync,2);
172163d334eSBarry Smith   ierr = PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));CHKERRQ(ierr);
17395fce210SBarry Smith   PetscFunctionReturn(0);
17495fce210SBarry Smith }
17595fce210SBarry Smith 
176f7a08781SBarry Smith static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
17795fce210SBarry Smith {
17895fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
17995fce210SBarry Smith 
18095fce210SBarry Smith   PetscFunctionBegin;
18195fce210SBarry Smith   *sync = w->sync;
18295fce210SBarry Smith   PetscFunctionReturn(0);
18395fce210SBarry Smith }
18495fce210SBarry Smith 
18595fce210SBarry Smith /*@C
18695fce210SBarry Smith    PetscSFGetWindow - Get a window for use with a given data type
18795fce210SBarry Smith 
18895fce210SBarry Smith    Collective on PetscSF
18995fce210SBarry Smith 
19095fce210SBarry Smith    Input Arguments:
19195fce210SBarry Smith +  sf - star forest
19295fce210SBarry Smith .  unit - data type
19395fce210SBarry Smith .  array - array to be sent
19495fce210SBarry Smith .  epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
19595fce210SBarry Smith .  fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_WINDOW_SYNC_FENCE
19695fce210SBarry Smith .  postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_WINDOW_SYNC_ACTIVE
19795fce210SBarry Smith -  startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_WINDOW_SYNC_ACTIVE
19895fce210SBarry Smith 
19995fce210SBarry Smith    Output Arguments:
20095fce210SBarry Smith .  win - window
20195fce210SBarry Smith 
20295fce210SBarry Smith    Level: developer
20395fce210SBarry Smith 
20495fce210SBarry Smith    Developer Notes:
20595fce210SBarry Smith    This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to
20695fce210SBarry Smith    reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse
20795fce210SBarry Smith    whichever one is available, by copying the array into it if necessary.
20895fce210SBarry Smith 
209*dec1416fSJunchao Zhang .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
21095fce210SBarry Smith @*/
21195fce210SBarry Smith static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win)
21295fce210SBarry Smith {
21395fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
21495fce210SBarry Smith   PetscErrorCode ierr;
21595fce210SBarry Smith   MPI_Aint       lb,lb_true,bytes,bytes_true;
21695fce210SBarry Smith   PetscSFWinLink link;
21795fce210SBarry Smith 
21895fce210SBarry Smith   PetscFunctionBegin;
21995fce210SBarry Smith   ierr = MPI_Type_get_extent(unit,&lb,&bytes);CHKERRQ(ierr);
22095fce210SBarry Smith   ierr = MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);CHKERRQ(ierr);
22195fce210SBarry Smith   if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
22295fce210SBarry Smith   if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
22395dccacaSBarry Smith   ierr = PetscNew(&link);CHKERRQ(ierr);
22495fce210SBarry Smith 
22595fce210SBarry Smith   link->bytes = bytes;
22695fce210SBarry Smith   link->addr  = array;
22795fce210SBarry Smith 
22895fce210SBarry Smith   ierr = MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,PetscObjectComm((PetscObject)sf),&link->win);CHKERRQ(ierr);
22995fce210SBarry Smith 
23095fce210SBarry Smith   link->epoch = epoch;
23195fce210SBarry Smith   link->next  = w->wins;
23295fce210SBarry Smith   link->inuse = PETSC_TRUE;
23395fce210SBarry Smith   w->wins     = link;
23495fce210SBarry Smith   *win        = link->win;
23595fce210SBarry Smith 
23695fce210SBarry Smith   if (epoch) {
23795fce210SBarry Smith     switch (w->sync) {
23895fce210SBarry Smith     case PETSCSF_WINDOW_SYNC_FENCE:
23995fce210SBarry Smith       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
24095fce210SBarry Smith       break;
24195fce210SBarry Smith     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
24295fce210SBarry Smith       break;
24395fce210SBarry Smith     case PETSCSF_WINDOW_SYNC_ACTIVE: {
24495fce210SBarry Smith       MPI_Group ingroup,outgroup;
24595fce210SBarry Smith       ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
24695fce210SBarry Smith       ierr = MPI_Win_post(ingroup,postassert,*win);CHKERRQ(ierr);
24795fce210SBarry Smith       ierr = MPI_Win_start(outgroup,startassert,*win);CHKERRQ(ierr);
24895fce210SBarry Smith     } break;
24995fce210SBarry Smith     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
25095fce210SBarry Smith     }
25195fce210SBarry Smith   }
25295fce210SBarry Smith   PetscFunctionReturn(0);
25395fce210SBarry Smith }
25495fce210SBarry Smith 
25595fce210SBarry Smith /*@C
25695fce210SBarry Smith    PetscSFFindWindow - Finds a window that is already in use
25795fce210SBarry Smith 
25895fce210SBarry Smith    Not Collective
25995fce210SBarry Smith 
26095fce210SBarry Smith    Input Arguments:
26195fce210SBarry Smith +  sf - star forest
26295fce210SBarry Smith .  unit - data type
26395fce210SBarry Smith -  array - array with which the window is associated
26495fce210SBarry Smith 
26595fce210SBarry Smith    Output Arguments:
26695fce210SBarry Smith .  win - window
26795fce210SBarry Smith 
26895fce210SBarry Smith    Level: developer
26995fce210SBarry Smith 
27095fce210SBarry Smith .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
27195fce210SBarry Smith @*/
27295fce210SBarry Smith static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win)
27395fce210SBarry Smith {
27495fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
27595fce210SBarry Smith   PetscSFWinLink link;
27695fce210SBarry Smith 
27795fce210SBarry Smith   PetscFunctionBegin;
278c0cd0301SJed Brown   *win = MPI_WIN_NULL;
27995fce210SBarry Smith   for (link=w->wins; link; link=link->next) {
28095fce210SBarry Smith     if (array == link->addr) {
28195fce210SBarry Smith       *win = link->win;
28295fce210SBarry Smith       PetscFunctionReturn(0);
28395fce210SBarry Smith     }
28495fce210SBarry Smith   }
28595fce210SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
28695fce210SBarry Smith   PetscFunctionReturn(0);
28795fce210SBarry Smith }
28895fce210SBarry Smith 
28995fce210SBarry Smith /*@C
29095fce210SBarry Smith    PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
29195fce210SBarry Smith 
29295fce210SBarry Smith    Collective
29395fce210SBarry Smith 
29495fce210SBarry Smith    Input Arguments:
29595fce210SBarry Smith +  sf - star forest
29695fce210SBarry Smith .  unit - data type
29795fce210SBarry Smith .  array - array associated with window
29895fce210SBarry Smith .  epoch - close an epoch, must match argument to PetscSFGetWindow()
29995fce210SBarry Smith -  win - window
30095fce210SBarry Smith 
30195fce210SBarry Smith    Level: developer
30295fce210SBarry Smith 
30395fce210SBarry Smith .seealso: PetscSFFindWindow()
30495fce210SBarry Smith @*/
30595fce210SBarry Smith static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win)
30695fce210SBarry Smith {
30795fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
30895fce210SBarry Smith   PetscErrorCode ierr;
30995fce210SBarry Smith   PetscSFWinLink *p,link;
31095fce210SBarry Smith 
31195fce210SBarry Smith   PetscFunctionBegin;
31295fce210SBarry Smith   for (p=&w->wins; *p; p=&(*p)->next) {
31395fce210SBarry Smith     link = *p;
31495fce210SBarry Smith     if (*win == link->win) {
31595fce210SBarry Smith       if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
31695fce210SBarry Smith       if (epoch != link->epoch) {
31795fce210SBarry Smith         if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
31895fce210SBarry Smith         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
31995fce210SBarry Smith       }
32095fce210SBarry Smith       *p = link->next;
32195fce210SBarry Smith       goto found;
32295fce210SBarry Smith     }
32395fce210SBarry Smith   }
32495fce210SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
32595fce210SBarry Smith 
32695fce210SBarry Smith found:
32795fce210SBarry Smith   if (epoch) {
32895fce210SBarry Smith     switch (w->sync) {
32995fce210SBarry Smith     case PETSCSF_WINDOW_SYNC_FENCE:
33095fce210SBarry Smith       ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr);
33195fce210SBarry Smith       break;
33295fce210SBarry Smith     case PETSCSF_WINDOW_SYNC_LOCK:
33395fce210SBarry Smith       break;                    /* handled outside */
33495fce210SBarry Smith     case PETSCSF_WINDOW_SYNC_ACTIVE: {
33595fce210SBarry Smith       ierr = MPI_Win_complete(*win);CHKERRQ(ierr);
33695fce210SBarry Smith       ierr = MPI_Win_wait(*win);CHKERRQ(ierr);
33795fce210SBarry Smith     } break;
33895fce210SBarry Smith     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
33995fce210SBarry Smith     }
34095fce210SBarry Smith   }
34195fce210SBarry Smith 
34295fce210SBarry Smith   ierr = MPI_Win_free(&link->win);CHKERRQ(ierr);
34395fce210SBarry Smith   ierr = PetscFree(link);CHKERRQ(ierr);
34495fce210SBarry Smith   *win = MPI_WIN_NULL;
34595fce210SBarry Smith   PetscFunctionReturn(0);
34695fce210SBarry Smith }
34795fce210SBarry Smith 
34895fce210SBarry Smith static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
34995fce210SBarry Smith {
35095fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
35195fce210SBarry Smith   PetscErrorCode ierr;
35295fce210SBarry Smith   MPI_Group      ingroup,outgroup;
35395fce210SBarry Smith 
35495fce210SBarry Smith   PetscFunctionBegin;
355b5a8e515SJed Brown   ierr = PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);CHKERRQ(ierr);
35695fce210SBarry Smith   switch (w->sync) {
35795fce210SBarry Smith   case PETSCSF_WINDOW_SYNC_ACTIVE:
35895fce210SBarry Smith     ierr = PetscSFGetGroups(sf,&ingroup,&outgroup);CHKERRQ(ierr);
35995fce210SBarry Smith   default:
36095fce210SBarry Smith     break;
36195fce210SBarry Smith   }
36295fce210SBarry Smith   PetscFunctionReturn(0);
36395fce210SBarry Smith }
36495fce210SBarry Smith 
3654416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
36695fce210SBarry Smith {
36795fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
36895fce210SBarry Smith   PetscErrorCode ierr;
36995fce210SBarry Smith 
37095fce210SBarry Smith   PetscFunctionBegin;
371e55864a3SBarry Smith   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");CHKERRQ(ierr);
37295fce210SBarry Smith   ierr = PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);CHKERRQ(ierr);
37395fce210SBarry Smith   ierr = PetscOptionsTail();CHKERRQ(ierr);
37495fce210SBarry Smith   PetscFunctionReturn(0);
37595fce210SBarry Smith }
37695fce210SBarry Smith 
37795fce210SBarry Smith static PetscErrorCode PetscSFReset_Window(PetscSF sf)
37895fce210SBarry Smith {
37995fce210SBarry Smith   PetscSF_Window  *w = (PetscSF_Window*)sf->data;
38095fce210SBarry Smith   PetscErrorCode  ierr;
38195fce210SBarry Smith   PetscSFDataLink link,next;
38295fce210SBarry Smith   PetscSFWinLink  wlink,wnext;
38395fce210SBarry Smith   PetscInt        i;
38495fce210SBarry Smith 
38595fce210SBarry Smith   PetscFunctionBegin;
38695fce210SBarry Smith   for (link=w->link; link; link=next) {
38795fce210SBarry Smith     next = link->next;
38895fce210SBarry Smith     ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);
38995fce210SBarry Smith     for (i=0; i<sf->nranks; i++) {
39095fce210SBarry Smith       ierr = MPI_Type_free(&link->mine[i]);CHKERRQ(ierr);
39195fce210SBarry Smith       ierr = MPI_Type_free(&link->remote[i]);CHKERRQ(ierr);
39295fce210SBarry Smith     }
39395fce210SBarry Smith     ierr = PetscFree2(link->mine,link->remote);CHKERRQ(ierr);
39495fce210SBarry Smith     ierr = PetscFree(link);CHKERRQ(ierr);
39595fce210SBarry Smith   }
39695fce210SBarry Smith   w->link = NULL;
39795fce210SBarry Smith   for (wlink=w->wins; wlink; wlink=wnext) {
39895fce210SBarry Smith     wnext = wlink->next;
39995fce210SBarry Smith     if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
40095fce210SBarry Smith     ierr = MPI_Win_free(&wlink->win);CHKERRQ(ierr);
40195fce210SBarry Smith     ierr = PetscFree(wlink);CHKERRQ(ierr);
40295fce210SBarry Smith   }
40395fce210SBarry Smith   w->wins = NULL;
40495fce210SBarry Smith   PetscFunctionReturn(0);
40595fce210SBarry Smith }
40695fce210SBarry Smith 
40795fce210SBarry Smith static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
40895fce210SBarry Smith {
40995fce210SBarry Smith   PetscErrorCode ierr;
41095fce210SBarry Smith 
41195fce210SBarry Smith   PetscFunctionBegin;
41229046d53SLisandro Dalcin   ierr = PetscSFReset_Window(sf);CHKERRQ(ierr);
41395fce210SBarry Smith   ierr = PetscFree(sf->data);CHKERRQ(ierr);
414bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);CHKERRQ(ierr);
415bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);CHKERRQ(ierr);
41695fce210SBarry Smith   PetscFunctionReturn(0);
41795fce210SBarry Smith }
41895fce210SBarry Smith 
41995fce210SBarry Smith static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
42095fce210SBarry Smith {
42195fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
42295fce210SBarry Smith   PetscErrorCode ierr;
42395fce210SBarry Smith   PetscBool      iascii;
42495fce210SBarry Smith 
42595fce210SBarry Smith   PetscFunctionBegin;
42695fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
42795fce210SBarry Smith   if (iascii) {
42895fce210SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  synchronization=%s sort=%s\n",PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
42995fce210SBarry Smith   }
43095fce210SBarry Smith   PetscFunctionReturn(0);
43195fce210SBarry Smith }
43295fce210SBarry Smith 
43395fce210SBarry Smith static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
43495fce210SBarry Smith {
43595fce210SBarry Smith   PetscSF_Window        *w = (PetscSF_Window*)sf->data;
43695fce210SBarry Smith   PetscErrorCode        ierr;
43795fce210SBarry Smith   PetscSFWindowSyncType synctype;
43895fce210SBarry Smith 
43995fce210SBarry Smith   PetscFunctionBegin;
44095fce210SBarry Smith   synctype = w->sync;
44195fce210SBarry Smith   if (!sf->setupcalled) {
44295fce210SBarry Smith     /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
44395fce210SBarry Smith     synctype = PETSCSF_WINDOW_SYNC_LOCK;
44495fce210SBarry Smith   }
44595fce210SBarry Smith   ierr = PetscSFWindowSetSyncType(newsf,synctype);CHKERRQ(ierr);
44695fce210SBarry Smith   PetscFunctionReturn(0);
44795fce210SBarry Smith }
44895fce210SBarry Smith 
44995fce210SBarry Smith static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
45095fce210SBarry Smith {
45195fce210SBarry Smith   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
45295fce210SBarry Smith   PetscErrorCode     ierr;
45395fce210SBarry Smith   PetscInt           i,nranks;
45495fce210SBarry Smith   const PetscMPIInt  *ranks;
45595fce210SBarry Smith   const MPI_Datatype *mine,*remote;
45695fce210SBarry Smith   MPI_Win            win;
45795fce210SBarry Smith 
45895fce210SBarry Smith   PetscFunctionBegin;
459*dec1416fSJunchao Zhang   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
46095fce210SBarry Smith   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
46195fce210SBarry Smith   ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);CHKERRQ(ierr);
46295fce210SBarry Smith   for (i=0; i<nranks; i++) {
46395fce210SBarry Smith     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);}
46495fce210SBarry Smith     ierr = MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);CHKERRQ(ierr);
46595fce210SBarry Smith     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);}
46695fce210SBarry Smith   }
46795fce210SBarry Smith   PetscFunctionReturn(0);
46895fce210SBarry Smith }
46995fce210SBarry Smith 
47095fce210SBarry Smith PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
47195fce210SBarry Smith {
47295fce210SBarry Smith   PetscErrorCode ierr;
47395fce210SBarry Smith   MPI_Win        win;
47495fce210SBarry Smith 
47595fce210SBarry Smith   PetscFunctionBegin;
47695fce210SBarry Smith   ierr = PetscSFFindWindow(sf,unit,rootdata,&win);CHKERRQ(ierr);
47795fce210SBarry Smith   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,&win);CHKERRQ(ierr);
47895fce210SBarry Smith   PetscFunctionReturn(0);
47995fce210SBarry Smith }
48095fce210SBarry Smith 
481de49d1a2SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
482de49d1a2SJunchao Zhang {
483de49d1a2SJunchao Zhang   PetscErrorCode ierr;
484de49d1a2SJunchao Zhang 
485de49d1a2SJunchao Zhang   PetscFunctionBegin;
486de49d1a2SJunchao Zhang   if (op == MPI_REPLACE) { ierr = PetscSFBcastBegin_Window(sf,unit,rootdata,leafdata);CHKERRQ(ierr); }
487de49d1a2SJunchao Zhang   else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastAndOpBegin_Window with reduction op has not been implemented");
488de49d1a2SJunchao Zhang   PetscFunctionReturn(0);
489de49d1a2SJunchao Zhang }
490de49d1a2SJunchao Zhang 
491de49d1a2SJunchao Zhang PetscErrorCode PetscSFBcastAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
492de49d1a2SJunchao Zhang {
493de49d1a2SJunchao Zhang   PetscErrorCode ierr;
494de49d1a2SJunchao Zhang 
495de49d1a2SJunchao Zhang   PetscFunctionBegin;
496de49d1a2SJunchao Zhang   if (op == MPI_REPLACE) { ierr = PetscSFBcastEnd_Window(sf,unit,rootdata,leafdata);CHKERRQ(ierr); }
497de49d1a2SJunchao Zhang   else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastAndOpEnd_Window with reduction op has not been implemented");
498de49d1a2SJunchao Zhang   PetscFunctionReturn(0);
499de49d1a2SJunchao Zhang }
500de49d1a2SJunchao Zhang 
50195fce210SBarry Smith PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
50295fce210SBarry Smith {
50395fce210SBarry Smith   PetscSF_Window     *w = (PetscSF_Window*)sf->data;
50495fce210SBarry Smith   PetscErrorCode     ierr;
50595fce210SBarry Smith   PetscInt           i,nranks;
50695fce210SBarry Smith   const PetscMPIInt  *ranks;
50795fce210SBarry Smith   const MPI_Datatype *mine,*remote;
50895fce210SBarry Smith   MPI_Win            win;
50995fce210SBarry Smith 
51095fce210SBarry Smith   PetscFunctionBegin;
511*dec1416fSJunchao Zhang   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
51295fce210SBarry Smith   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
51395fce210SBarry Smith   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
51495fce210SBarry Smith   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&win);CHKERRQ(ierr);
51595fce210SBarry Smith   for (i=0; i<nranks; i++) {
51695fce210SBarry Smith     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);}
51795fce210SBarry Smith     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);CHKERRQ(ierr);
51895fce210SBarry Smith     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);}
51995fce210SBarry Smith   }
52095fce210SBarry Smith   PetscFunctionReturn(0);
52195fce210SBarry Smith }
52295fce210SBarry Smith 
52395fce210SBarry Smith static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
52495fce210SBarry Smith {
52595fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
52695fce210SBarry Smith   PetscErrorCode ierr;
52795fce210SBarry Smith   MPI_Win        win;
52895fce210SBarry Smith 
52995fce210SBarry Smith   PetscFunctionBegin;
53095fce210SBarry Smith   if (!w->wins) PetscFunctionReturn(0);
53195fce210SBarry Smith   ierr = PetscSFFindWindow(sf,unit,rootdata,&win);CHKERRQ(ierr);
53295fce210SBarry Smith   ierr = MPI_Win_fence(MPI_MODE_NOSUCCEED,win);CHKERRQ(ierr);
53395fce210SBarry Smith   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSUCCEED,&win);CHKERRQ(ierr);
53495fce210SBarry Smith   PetscFunctionReturn(0);
53595fce210SBarry Smith }
53695fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
53795fce210SBarry Smith {
53895fce210SBarry Smith   PetscErrorCode     ierr;
53995fce210SBarry Smith   PetscInt           i,nranks;
54095fce210SBarry Smith   const PetscMPIInt  *ranks;
54195fce210SBarry Smith   const MPI_Datatype *mine,*remote;
54295fce210SBarry Smith   MPI_Win            win;
54395fce210SBarry Smith 
54495fce210SBarry Smith   PetscFunctionBegin;
545*dec1416fSJunchao Zhang   ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
54695fce210SBarry Smith   ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr);
54795fce210SBarry Smith   ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr);
54895fce210SBarry Smith   ierr = PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);CHKERRQ(ierr);
54995fce210SBarry Smith   for (i=0; i<sf->nranks; i++) {
55095fce210SBarry Smith     ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);CHKERRQ(ierr);
55195fce210SBarry Smith     ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);CHKERRQ(ierr);
55295fce210SBarry Smith     ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);CHKERRQ(ierr);
55395fce210SBarry Smith     ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);
55495fce210SBarry Smith   }
55595fce210SBarry Smith   PetscFunctionReturn(0);
55695fce210SBarry Smith }
55795fce210SBarry Smith 
55895fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
55995fce210SBarry Smith {
56095fce210SBarry Smith   PetscErrorCode ierr;
56195fce210SBarry Smith   MPI_Win        win;
56295fce210SBarry Smith 
56395fce210SBarry Smith   PetscFunctionBegin;
56495fce210SBarry Smith   ierr = PetscSFFindWindow(sf,unit,rootdata,&win);CHKERRQ(ierr);
56595fce210SBarry Smith   /* Nothing to do currently because MPI_LOCK_EXCLUSIVE is used in PetscSFFetchAndOpBegin(), rendering this implementation synchronous. */
56695fce210SBarry Smith   ierr = PetscSFRestoreWindow(sf,unit,rootdata,PETSC_FALSE,0,&win);CHKERRQ(ierr);
56795fce210SBarry Smith   PetscFunctionReturn(0);
56895fce210SBarry Smith }
56995fce210SBarry Smith 
570*dec1416fSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
57195fce210SBarry Smith {
57295fce210SBarry Smith   PetscSF_Window *w = (PetscSF_Window*)sf->data;
57395fce210SBarry Smith   PetscErrorCode ierr;
57495fce210SBarry Smith 
57595fce210SBarry Smith   PetscFunctionBegin;
57695fce210SBarry Smith   sf->ops->SetUp           = PetscSFSetUp_Window;
57795fce210SBarry Smith   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
57895fce210SBarry Smith   sf->ops->Reset           = PetscSFReset_Window;
57995fce210SBarry Smith   sf->ops->Destroy         = PetscSFDestroy_Window;
58095fce210SBarry Smith   sf->ops->View            = PetscSFView_Window;
58195fce210SBarry Smith   sf->ops->Duplicate       = PetscSFDuplicate_Window;
58295fce210SBarry Smith   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
58395fce210SBarry Smith   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
584de49d1a2SJunchao Zhang   sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Window;
585de49d1a2SJunchao Zhang   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Window;
58695fce210SBarry Smith   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
58795fce210SBarry Smith   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
58895fce210SBarry Smith   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
58995fce210SBarry Smith   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;
59095fce210SBarry Smith 
591b00a9115SJed Brown   ierr = PetscNewLog(sf,&w);CHKERRQ(ierr);
59295fce210SBarry Smith   sf->data = (void*)w;
59395fce210SBarry Smith   w->sync  = PETSCSF_WINDOW_SYNC_FENCE;
59495fce210SBarry Smith 
595bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);CHKERRQ(ierr);
596bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);CHKERRQ(ierr);
59795fce210SBarry Smith 
59895fce210SBarry Smith #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
59995fce210SBarry Smith   {
60095fce210SBarry Smith     PetscBool ackbug = PETSC_FALSE;
601c5929fdfSBarry Smith     ierr = PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);CHKERRQ(ierr);
60295fce210SBarry Smith     if (ackbug) {
603955c1f14SBarry Smith       ierr = PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");CHKERRQ(ierr);
60495fce210SBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
60595fce210SBarry Smith   }
60695fce210SBarry Smith #endif
60795fce210SBarry Smith   PetscFunctionReturn(0);
60895fce210SBarry Smith }
609